package com.ibm.dltj;

import com.ibm.dltj.UniLexAnalyzer;
import com.ibm.dltj.gloss.CatNameGloss;
import com.ibm.dltj.gloss.FeatureSetGloss;
import com.ibm.dltj.gloss.GlossPool;
import com.ibm.dltj.gloss.IntegerGloss;
import com.ibm.dltj.gloss.MidGloss;
import com.ibm.dltj.gloss.TokenClassGloss;
import com.ibm.dltj.gloss.ZhLemmaGloss;
import com.ibm.dltj.parser.ParsingStream;
import com.ibm.dltj.util.FileUtils;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.Collection;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.TreeSet;

/* loaded from: input_file:dlt.jar:com/ibm/dltj/UniLexAnalyzerC.class */
final class UniLexAnalyzerC extends UniLexAnalyzerEu {
    private static final String UNKNOWN_SIMPLE_TEXT_LANG = "STL";
    private static final String UNKNOWN_COMPLEX_TEXT_LANG = "CTL";
    private static final int CONF_THRES_TABLE = 1;
    private static final int NORM_FACTOR_TABLE = 2;
    private int matchCondition;
    private static final boolean DEBUG = "1".equals(System.getProperty("dlt.log.doOutput"));
    private boolean doUnk;
    private int tieDelta;
    private StringBuilder logsb;
    private TreeMap<String, LanguageGuess> lmap;
    private int unkCTLvotes;
    private int unkSTLvotes;
    private int tokenCount;
    private int characterCount;
    private int maxCharsToExamine;
    private int upperLimit;

    static String getCopyright() {
        return "\n\n(C) Copyright IBM Corp. 2003, 2010.\n\n";
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int getMaxCharsToExamine() {
        return this.maxCharsToExamine;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void setMaxCharsToExamine(int i) {
        this.maxCharsToExamine = i;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int getTieDelta() {
        return this.tieDelta;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void setTieDelta(int i) {
        this.tieDelta = i;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void doUnknown(boolean z) {
        this.doUnk = z;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int getConfidenceThreshold(String str) {
        if (this.lmap.containsKey(str)) {
            return this.lmap.get(str).getConfThres();
        }
        return -1;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int setConfidenceThreshold(String str, int i) {
        if (!this.lmap.containsKey(str)) {
            return -1;
        }
        LanguageGuess languageGuess = this.lmap.get(str);
        int confThres = languageGuess.getConfThres();
        languageGuess.setConfThres(i);
        return confThres;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int getNormFactor(String str) {
        if (this.lmap.containsKey(str)) {
            return this.lmap.get(str).getNormFactor();
        }
        return -1;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int setNormFactor(String str, int i) {
        if (!this.lmap.containsKey(str)) {
            return -1;
        }
        LanguageGuess languageGuess = this.lmap.get(str);
        int normFactor = languageGuess.getNormFactor();
        languageGuess.setConfThres(i);
        return normFactor;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public UniLexAnalyzerC(String str) throws DLTException {
        super(str);
        this.doUnk = true;
        this.tieDelta = -1;
        this.tokenCount = 0;
        this.characterCount = 0;
        this.maxCharsToExamine = -1;
        this.upperLimit = 0;
    }

    @Override // com.ibm.dltj.UniLexAnalyzerEu, com.ibm.dltj.UniLexAnalyzer
    public void open(Dictionary[] dictionaryArr, int i, int i2, int i3) throws DLTException {
        super.open(dictionaryArr, i, i2, i3);
        this.lmap = new TreeMap<>();
        for (int i4 = 0; i4 < this.morphoSyntax.dictionaries.length; i4++) {
            GlossPool poolByNumber = this.morphoSyntax.dictionaries[i4].getPoolByNumber(19);
            if (poolByNumber != null) {
                Iterator<Gloss> glossIterator = poolByNumber.getGlossIterator();
                while (glossIterator.hasNext()) {
                    LanguageGuess languageGuess = new LanguageGuess(((CatNameGloss) glossIterator.next()).value, 0, 0);
                    this.lmap.put(languageGuess.getLanguage(), languageGuess);
                }
            }
        }
        loadTable(1);
        loadTable(2);
        if (DEBUG) {
            this.logsb = new StringBuilder(256);
        }
    }

    private int loadTable(int i) {
        StringCharacterIterator stringCharacterIterator;
        switch (i) {
            case 1:
                stringCharacterIterator = new StringCharacterIterator("jfrost_confThres4Lang");
                break;
            case 2:
                stringCharacterIterator = new StringCharacterIterator("jfrost_NormFactor4Lang");
                break;
            default:
                return -1;
        }
        for (int i2 = 0; i2 < this.morphoSyntax.dictionaries.length; i2++) {
            this.morphoSyntax.matches.clear();
            this.morphoSyntax.dictionaries[i2].lookupLongest(stringCharacterIterator, this.morphoSyntax.matches);
            GlossCollection[] glossCollectionArr = this.morphoSyntax.matches.gloss;
            if (this.morphoSyntax.matches.getMaxIndex() != stringCharacterIterator.getEndIndex()) {
                return 0;
            }
            Iterator<Gloss> it = glossCollectionArr[this.morphoSyntax.matches.mpos - 1].iterator();
            while (it.hasNext()) {
                Gloss next = it.next();
                if (next instanceof MidGloss) {
                    CatNameGloss catNameGloss = (CatNameGloss) ((MidGloss) next).getGloss(CatNameGloss.class);
                    IntegerGloss integerGloss = (IntegerGloss) ((MidGloss) next).getGloss(IntegerGloss.class);
                    LanguageGuess languageGuess = this.lmap.get(catNameGloss.value);
                    if (i == 1) {
                        languageGuess.setConfThres(integerGloss.weight);
                    } else if (i == 2) {
                        languageGuess.setNormFactor(integerGloss.weight);
                    }
                }
            }
        }
        this.morphoSyntax.matches.clear();
        return 1;
    }

    public void reset() {
        Iterator<LanguageGuess> it = this.lmap.values().iterator();
        while (it.hasNext()) {
            it.next().setScore(0L);
        }
        this.tokenCount = 0;
        this.characterCount = 0;
        this.upperLimit = 0;
    }

    @Override // com.ibm.dltj.UniLexAnalyzer
    public void close() throws DLTException {
        if (this.lmap != null) {
            this.lmap.clear();
        }
        super.close();
    }

    protected int reverseDictLookup(UniLexAnalyzer.DictLookupPos dictLookupPos) {
        int index = this.characterIterator.getIndex();
        while (true) {
            if (dictLookupPos.dictIdx >= this.morphoSyntax.dictionaries.length) {
                break;
            }
            this.characterIterator.setIndex(index);
            this.morphoSyntax.dictionaries[dictLookupPos.dictIdx].lookupLongestReversed(this.characterIterator, this.morphoSyntax.matches);
            if (this.morphoSyntax.matches.getNumMatches() != 0 && this.morphoSyntax.cmPolicy == 1) {
                dictLookupPos.dictIdx++;
                break;
            }
            dictLookupPos.dictIdx++;
        }
        return this.morphoSyntax.matches.getNumMatches();
    }

    private void updateCounter(int i, int i2) {
        this.tokenCount++;
        this.characterCount += i2 - i;
        if (DEBUG) {
            memcpy(this.logsb, this.characterIterator, i, i2);
        }
    }

    @Override // com.ibm.dltj.UniLexAnalyzerEu, com.ibm.dltj.UniLexAnalyzer
    public ParsingStream doProcessText(CharacterIterator characterIterator) throws DLTException {
        reset();
        this.upperLimit = characterIterator.getEndIndex();
        if (this.maxCharsToExamine > 0 && this.maxCharsToExamine < this.upperLimit) {
            this.upperLimit = this.maxCharsToExamine;
        }
        return super.doProcessText(characterIterator);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.ibm.dltj.UniLexAnalyzerEu
    public boolean doDictLookup() {
        boolean doDictLookup = super.doDictLookup();
        if (this.characterCount > this.upperLimit) {
            doDictLookup = false;
            this.startIndex = this.characterIterator.getEndIndex();
        }
        return doDictLookup;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.ibm.dltj.UniLexAnalyzerEu
    public void decomposeAndPost() {
        updateCounter(this.startIndex, this.morphoSyntax.matches.getMaxIndex());
        super.decomposeAndPost();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.ibm.dltj.UniLexAnalyzer
    public void insertTokenClassGloss(int i, int i2, TokenClassGloss tokenClassGloss) {
        if (tokenClassGloss.is(TokenClassGloss.TAG_TOKEN)) {
            updateCounter(i, i2);
            if (isComplexText(tokenClassGloss)) {
                this.unkCTLvotes++;
            } else {
                this.unkSTLvotes++;
            }
        }
        super.insertTokenClassGloss(i, i2, tokenClassGloss);
    }

    private boolean isComplexText(TokenClassGloss tokenClassGloss) {
        int bestClass = tokenClassGloss.getBestClass();
        return bestClass >= 300 && bestClass <= 330;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.ibm.dltj.UniLexAnalyzer
    public final void insertUnknown(int i, int i2, int i3) {
        if (this.morphoSyntax.matches.mpos > 0) {
            this.matchCondition = 2;
            insertGlosses(i);
            this.matchCondition = 0;
            return;
        }
        this.dictPos.dictIdx = 0;
        this.morphoSyntax.matches.clear();
        this.morphoSyntax.matches.setMaxIndex(Integer.MAX_VALUE);
        this.characterIterator.setIndex(i2);
        if (reverseDictLookup(this.dictPos) > 0) {
            this.matchCondition = 8;
            if (this.startIndex < this.morphoSyntax.matches.index[this.morphoSyntax.matches.mpos - 1]) {
                insertGlosses(this.startIndex);
            }
            this.matchCondition = 0;
        }
        this.characterIterator.setIndex(i2);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.ibm.dltj.UniLexAnalyzer
    public final void insertUnknownWord(int i, int i2, int i3) {
        if (this.morphoSyntax.matches.mpos > 0) {
            this.matchCondition = 2;
            insertGlosses(i);
            this.matchCondition = 0;
            return;
        }
        this.dictPos.dictIdx = 0;
        this.morphoSyntax.matches.clear();
        this.morphoSyntax.matches.setMaxIndex(Integer.MAX_VALUE);
        this.characterIterator.setIndex(i2);
        if (reverseDictLookup(this.dictPos) > 0) {
            this.matchCondition = 8;
            if (this.startIndex < this.morphoSyntax.matches.index[this.morphoSyntax.matches.mpos - 1]) {
                insertGlosses(this.startIndex);
            }
            this.matchCondition = 0;
        }
        this.characterIterator.setIndex(i2);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.ibm.dltj.UniLexAnalyzer
    public final void insertParsedGloss(int i, int i2, GlossCollection glossCollection) {
        if (this.matchCondition == 0) {
            this.matchCondition = 1;
        }
        processMatches(i, i2, glossCollection);
        this.matchCondition = 0;
    }

    private final void processMatches(int i, int i2, GlossCollection glossCollection) {
        FeatureSetGloss featureGloss;
        boolean z = false;
        Iterator<Gloss> it = glossCollection.iterator();
        while (it.hasNext()) {
            Gloss next = it.next();
            if ((next instanceof MidGloss) && (featureGloss = ((MidGloss) next).getFeatureGloss()) != null) {
                int bofa = featureGloss.getBOFA();
                if (this.matchCondition != 0 && 0 != (bofa & this.matchCondition)) {
                    CatNameGloss catNameGloss = (CatNameGloss) ((MidGloss) next).getGloss(CatNameGloss.class);
                    IntegerGloss integerGloss = (IntegerGloss) ((MidGloss) next).getGloss(IntegerGloss.class);
                    if (catNameGloss != null && integerGloss != null) {
                        LanguageGuess languageGuess = this.lmap.get(catNameGloss.value);
                        languageGuess.setScore(languageGuess.getScore() + integerGloss.weight);
                        if (DEBUG) {
                            z = true;
                            if (0 != (this.matchCondition & 8)) {
                                this.logsb.append("[reverse]");
                            }
                            this.logsb.append("(" + catNameGloss.value + "," + integerGloss.weight + ")");
                        }
                    }
                }
            }
        }
        if (DEBUG && z) {
            System.out.println(this.logsb.toString());
            String property = System.getProperty("dlt.debug.out", ZhLemmaGloss.ZHLEMMA_SAME);
            if (property.length() > 0) {
                OutputStreamWriter outputStreamWriter = null;
                try {
                    try {
                        outputStreamWriter = new OutputStreamWriter(new FileOutputStream(property, true), "utf-8");
                        outputStreamWriter.write(this.logsb.toString());
                        outputStreamWriter.write(FileUtils.LINE_SEPARATOR);
                        outputStreamWriter.flush();
                        if (outputStreamWriter != null) {
                            try {
                                outputStreamWriter.close();
                            } catch (IOException e) {
                            }
                        }
                    } catch (Throwable th) {
                        if (outputStreamWriter != null) {
                            try {
                                outputStreamWriter.close();
                            } catch (IOException e2) {
                            }
                        }
                        throw th;
                    }
                } catch (IOException e3) {
                    e3.printStackTrace();
                    if (outputStreamWriter != null) {
                        try {
                            outputStreamWriter.close();
                        } catch (IOException e4) {
                        }
                    }
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public LanguageGuess[] getClassificationResult() {
        LanguageGuess[] languageGuessArr = null;
        Collection<LanguageGuess> values = this.lmap.values();
        TreeSet treeSet = new TreeSet();
        TreeSet treeSet2 = new TreeSet();
        treeSet.addAll(values);
        if (this.doUnk) {
            Iterator it = treeSet.iterator();
            while (it.hasNext()) {
                LanguageGuess languageGuess = (LanguageGuess) it.next();
                if (languageGuess.getScore() == 0) {
                    break;
                }
                if (languageGuess.isConfident4Tokens(this.tokenCount)) {
                    treeSet2.add(languageGuess);
                }
            }
            if (treeSet2.size() == 0) {
                LanguageGuess[] languageGuessArr2 = new LanguageGuess[1];
                if (this.unkCTLvotes > this.unkSTLvotes) {
                    languageGuessArr2[0] = new LanguageGuess(UNKNOWN_COMPLEX_TEXT_LANG, this.unkCTLvotes, 1);
                } else {
                    languageGuessArr2[0] = new LanguageGuess(UNKNOWN_SIMPLE_TEXT_LANG, this.unkSTLvotes, 1);
                }
                return languageGuessArr2;
            }
        } else {
            Iterator it2 = treeSet.iterator();
            while (it2.hasNext()) {
                LanguageGuess languageGuess2 = (LanguageGuess) it2.next();
                if (languageGuess2.getScore() == 0) {
                    break;
                }
                treeSet2.add(languageGuess2);
            }
            if (treeSet2.size() == 0) {
                return new LanguageGuess[]{new LanguageGuess(UNKNOWN_SIMPLE_TEXT_LANG, 0, 1)};
            }
        }
        int size = treeSet2.size();
        if (size > 0) {
            Iterator it3 = treeSet2.iterator();
            languageGuessArr = new LanguageGuess[size];
            int i = 0;
            while (it3.hasNext()) {
                languageGuessArr[i] = (LanguageGuess) it3.next();
                i++;
            }
            languageGuessArr[0].setConfidence(1);
            if (this.tieDelta != -1 && size > 1) {
                long normalizedScore = languageGuessArr[0].getNormalizedScore();
                if (((normalizedScore - r0) / languageGuessArr[1].getNormalizedScore()) * 100.0d < this.tieDelta) {
                    languageGuessArr[1].setConfidence(1);
                }
            }
        }
        return languageGuessArr;
    }
}
