/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.supa.annotator.annotators;

import com.ibm.supa.annotator.annotators.IBlockTitleClassifier;
import com.ibm.supa.annotator.util.text.VisualTextUtil;
import java.util.HashMap;
import java.util.HashSet;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class LearningBlockTitleClassiifier
implements IBlockTitleClassifier {
    private static final Pattern sPattern = Pattern.compile("([a-zA-Z_](?:\\p{Punct}\\S|[^\\p{Punct}])*)((?:(\\p{Punct})(?:\\s+|$))|([\\s&&[^\r\n]]))");
    private static final Integer[] sCounts = new Integer[256];
    private final int mMinTimesToSeeRuleBeforeMarkingNewBlockTitle;
    private final HashMap<String, Integer> mPunctuationRuleCount;
    private final HashSet<String> mTitles;
    private int mNumTitles;
    private long mNumTokenSqrSum;
    private long mNumTokenSum;
    private double mNumTokenStdMult = 3.0;

    static {
        int i = 0;
        while (i < sCounts.length) {
            LearningBlockTitleClassiifier.sCounts[i] = new Integer(i + 1);
            ++i;
        }
    }

    public LearningBlockTitleClassiifier(int minTimesToSeeRuleBeforeMarkingNewBlock) {
        this.mMinTimesToSeeRuleBeforeMarkingNewBlockTitle = minTimesToSeeRuleBeforeMarkingNewBlock;
        this.mPunctuationRuleCount = new HashMap();
        this.mTitles = new HashSet();
    }

    public void clearRuleCounts() {
        this.mPunctuationRuleCount.clear();
    }

    public void reset() {
        this.clearRuleCounts();
    }

    public int getBlockTitleLength(String text, boolean atStartOfBlock) {
        String line = VisualTextUtil.getFirstLineSubstring(text);
        Matcher matcher = sPattern.matcher(line);
        boolean matchFound = false;
        String title = null;
        String punc = null;
        int numTokensInTitle = 0;
        if (matcher.lookingAt()) {
            title = LearningBlockTitleClassiifier.makeCanonical(matcher.group(1).trim());
            numTokensInTitle = new StringTokenizer(title).countTokens();
            punc = matcher.group(2).trim();
            if (atStartOfBlock) {
                if (punc.equals(".") || punc.equals(",")) {
                    int numTotalTokens = new StringTokenizer(line).countTokens();
                    matchFound = numTokensInTitle <= 2 && numTotalTokens == numTokensInTitle;
                } else {
                    matchFound = true;
                }
            } else if (this.mTitles.contains(title)) {
                matchFound = true;
            } else {
                double std;
                double ave;
                boolean puncOk = true;
                if (punc.length() != 0) {
                    Integer count = this.mPunctuationRuleCount.get(punc);
                    if (count == null) {
                        puncOk = this.mMinTimesToSeeRuleBeforeMarkingNewBlockTitle == 0;
                    } else {
                        boolean bl = puncOk = count >= this.mMinTimesToSeeRuleBeforeMarkingNewBlockTitle;
                    }
                }
                if (puncOk && this.mNumTitles >= 2 && (double)numTokensInTitle <= (ave = (double)(this.mNumTokenSum / (long)this.mNumTitles)) + this.mNumTokenStdMult * (std = Math.sqrt((double)this.mNumTitles * ((double)this.mNumTokenSqrSum / (double)this.mNumTitles - ave * ave) / (double)(this.mNumTitles - 1)))) {
                    matchFound = true;
                }
            }
        }
        if (!matchFound) {
            return -1;
        }
        Integer count = this.mPunctuationRuleCount.get(punc);
        count = count == null ? sCounts[0] : (count < sCounts.length ? sCounts[count] : new Integer(count + 1));
        this.mPunctuationRuleCount.put(punc, count);
        this.mTitles.add(title);
        ++this.mNumTitles;
        this.mNumTokenSum += (long)numTokensInTitle;
        this.mNumTokenSqrSum += (long)(numTokensInTitle * numTokensInTitle);
        return matcher.group(1).trim().length();
    }

    private static String makeCanonical(String title) {
        StringBuffer sBuf = new StringBuffer();
        StringTokenizer sTok = new StringTokenizer(title);
        if (sTok.hasMoreTokens()) {
            sBuf.append(sTok.nextToken());
            while (sTok.hasMoreTokens()) {
                sBuf.append(' ').append(sTok.nextToken());
            }
        }
        return sBuf.toString().toLowerCase();
    }
}

