/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.supa.search.queries;

import com.ibm.es.nuvo.GlobalSystem;
import com.ibm.es.nuvo.tokenizer.BaseTagToken;
import com.ibm.es.nuvo.tokenizer.NormalizedResultHandler;
import com.ibm.es.nuvo.tokenizer.ResultHandler;
import com.ibm.es.nuvo.tokenizer.TToken;
import com.ibm.es.nuvo.tokenizer.Tokenizer;
import com.ibm.es.nuvo.tokenizer.TokenizerException;
import com.ibm.es.nuvo.tokenizer.TokenizerProcessInput;
import com.ibm.es.nuvo.tokenizer.XMLToken;
import com.ibm.siapi.SiapiException;
import com.ibm.supa.common.text.TextUtils;
import com.ibm.supa.search.SearchWrapper;
import com.ibm.supa.search.queries.QueryBuilder;
import com.ibm.supa.tokenizers.TokenizationParams;
import com.ibm.supa.tokenizers.TokenizationUtils;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DefaultSimilarity;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class SearchSelector {
    private static final double REG_WEIGHT = 5.0;
    private static final double LEMMA_WEIGHT = 3.0;
    private static final double SW_WEIGHT = 2.0;
    private static final double SW_LEMMA_WEIGHT = 1.0;
    private static final double NORM_WEIGHT = Math.sqrt(11.0);
    String query;
    SearchWrapper[] wrappers;
    int termVectorSize;
    HashMap<Integer, ArrayList<String>> freqMap = new HashMap();
    ArrayList<String> sortedTokens = new ArrayList();

    public SearchSelector(String query, SearchWrapper[] wrappers, int termVectorSize) throws SiapiException, IOException {
        this.query = query;
        this.wrappers = wrappers;
        this.termVectorSize = termVectorSize;
        QueryBuilder.WorkCollections wc = this.buildQueryTerms();
        this.calcFreq(wc);
        this.sortFreqs();
    }

    public ArrayList<String> getSortedTokens() {
        return this.sortedTokens;
    }

    void calcFreq(QueryBuilder.WorkCollections wc) throws SiapiException, IOException {
        ArrayList<String> tokens = new ArrayList<String>();
        tokens.addAll(wc.regularTokens);
        tokens.addAll(wc.lemmaTokens);
        tokens.addAll(wc.subsubTokens);
        HashMap<String, Integer> tokenFreqMap = new HashMap<String, Integer>();
        for (String token : tokens) {
            int freq = 0;
            for (SearchWrapper wrapper : this.wrappers) {
                freq -= wrapper.getTextIndexReader().docFreq(new Term("_plain", token));
            }
            if (tokenFreqMap.get(token) != null) continue;
            tokenFreqMap.put(token, new Integer(freq));
            if (this.freqMap.get(freq) == null) {
                this.freqMap.put(new Integer(freq), new ArrayList());
            }
            ArrayList<String> tokenList = this.freqMap.get(freq);
            tokenList.add(token);
            this.freqMap.remove(freq);
            this.freqMap.put(freq, tokenList);
        }
    }

    public QueryBuilder.WorkCollections buildQueryTerms() throws SiapiException {
        QueryBuilder.WorkCollections wc = new QueryBuilder.WorkCollections();
        wc.regularTokens.clear();
        wc.lemmaTokens.clear();
        wc.subTokens.clear();
        String tokenText = ((Object)TextUtils.slashUnescape(this.query)).toString();
        SearchWrapper wrapper = this.wrappers[0];
        QueryBuilder builder = new QueryBuilder(wrapper, null);
        StringBuilder buff = new StringBuilder();
        List<TToken> tokens = builder.tokenize(tokenText);
        for (TToken token : tokens) {
            String text = this.removeNotAlphaNum(builder.normalize(token), buff);
            if (text.length() == 0) continue;
            if (TokenizationUtils.isSubwordForm(text)) {
                wc.subTokens.add(TokenizationUtils.makeNonSubWordForm(text));
                continue;
            }
            if (TokenizationUtils.isLemma(text)) {
                wc.lemmaTokens.add(text);
                continue;
            }
            wc.regularTokens.add(text);
        }
        return wc;
    }

    public String removeNotAlphaNum(String text, StringBuilder buff) {
        int l = text.length();
        for (int i = 0; i < l; ++i) {
            char c = text.charAt(i);
            if (!Character.isLetterOrDigit(c)) continue;
            buff.append(c);
        }
        String result = buff.toString();
        buff.setLength(0);
        return result;
    }

    void sortFreqs() {
        ArrayList<Integer> freqList = new ArrayList<Integer>();
        freqList.addAll(this.freqMap.keySet());
        Arrays.sort(freqList.toArray());
        int i = 0;
        for (Integer freq : freqList) {
            if (i > this.termVectorSize) break;
            this.sortedTokens.addAll(this.makeOptional(this.freqMap.get(freq)));
            i = this.sortedTokens.size();
        }
    }

    ArrayList<String> makeOptional(ArrayList<String> tokens) {
        int l = tokens.size();
        for (int i = 0; i < l; ++i) {
            String token = tokens.get(i) + "?";
            tokens.remove(i);
            tokens.add(i, token);
        }
        return tokens;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static String[] getInterestingWords(String text, SearchWrapper[] wrappers, int numTerms) throws TokenizerException, SiapiException {
        final HashMap lookup = new HashMap();
        final DefaultSimilarity similarity = new DefaultSimilarity();
        final HashSet testedTokens = new HashSet();
        TokenizerProcessInput tpi = new TokenizerProcessInput(text, null, null);
        tpi.setStartOfAttributeValues(-1);
        for (SearchWrapper wrapper : wrappers) {
            tpi.setTokenizationParams(new TokenizationParams(false, wrapper.getCollectionId(), null, null, "en", null));
            final IndexReader reader = wrapper.getTextIndexReader();
            final QueryBuilder builder = new QueryBuilder(wrapper, null);
            testedTokens.clear();
            Handler handler = new Handler(){

                public void addToken(TToken newToken) {
                    if (newToken.getType() == TToken.Type.WHITESPACE) {
                        return;
                    }
                    String tokText = newToken.toString();
                    TfIdf tfidf = (TfIdf)lookup.get(tokText);
                    if (tfidf == null) {
                        tfidf = new TfIdf();
                        tfidf.text = tokText;
                        lookup.put(tokText, tfidf);
                    }
                    ++tfidf.tf;
                    if (!testedTokens.add(tokText)) {
                        return;
                    }
                    List<TToken> tokens = builder.tokenize(tokText);
                    if (tokens.isEmpty()) {
                        return;
                    }
                    double docFreq = 0.0;
                    for (TToken token : tokens) {
                        String text = builder.normalize(token);
                        double weight = 5.0;
                        if (TokenizationUtils.isSubwordForm(text)) {
                            weight = TokenizationUtils.isSubword(text) ? 2.0 : 1.0;
                        } else if (TokenizationUtils.isLemma(text)) {
                            weight = 3.0;
                        }
                        try {
                            docFreq += weight * (double)reader.docFreq(new Term("_plain", text));
                        }
                        catch (IOException e) {}
                    }
                    if (docFreq != 0.0) {
                        double idf = similarity.idf((int)Math.ceil(docFreq /= (double)tokens.size()), (int)(NORM_WEIGHT * (double)reader.maxDoc()));
                        tfidf.idf += idf * idf;
                    }
                }

                public void addAttributeValueToken(TToken token) {
                }

                public void addBaseTagToken(BaseTagToken tagToken) {
                }

                public int getMinPositionIncrement() {
                    return 1;
                }

                public int getPositionIncrement() {
                    return 1;
                }

                public int getSentencePositionIncrement() {
                    return 1;
                }

                public void setClosingPosition(int position) {
                }

                public void addField(String name, String value) {
                }

                public void addToken(TToken token, int begin, int end) {
                    this.addToken(token);
                }

                public void addXMLToken(XMLToken xmlToken) {
                }

                public void close() {
                }

                public void reset() {
                }
            };
            Tokenizer tokenizer = GlobalSystem.getSingleInstance().getTokenizerPool().aquire(wrapper.getCollectionId());
            try {
                tokenizer.process(handler, tpi);
            }
            finally {
                if (tokenizer != null) {
                    GlobalSystem.getSingleInstance().getTokenizerPool().release(tokenizer);
                }
            }
        }
        for (TfIdf tfidf : lookup.values()) {
            tfidf.value = (double)(-tfidf.tf) * tfidf.idf * tfidf.idf;
        }
        ArrayList ordered = new ArrayList();
        ordered.addAll(lookup.values());
        Collections.sort(ordered);
        int length = Math.min(numTerms, ordered.size());
        String[] result = new String[length];
        int i = 0;
        for (TfIdf tfidf : ordered) {
            result[i++] = tfidf.text;
            if (i < result.length) continue;
            break;
        }
        return result;
    }

    public static String[] getInterestingWords2(String text, SearchWrapper[] wrappers, int numTerms) throws TokenizerException, SiapiException {
        Map<String, TfIdf> lookup = SearchSelector.getTfIdfs(text, wrappers);
        for (TfIdf tfidf : lookup.values()) {
            tfidf.value = (double)(-tfidf.tf) * tfidf.idf;
        }
        ArrayList<TfIdf> ordered = new ArrayList<TfIdf>();
        ordered.addAll(lookup.values());
        Collections.sort(ordered);
        HashSet<String> stemmedForms = new HashSet<String>();
        ArrayList<String> resultList = new ArrayList<String>();
        for (TfIdf tfidf : ordered) {
            if (resultList.size() >= numTerms) break;
            SingleStringReader reader = new SingleStringReader();
            reader.setStr(tfidf.text);
            PorterStemFilter filter = new PorterStemFilter((TokenStream)new LowerCaseTokenizer((Reader)reader));
            try {
                Token token = filter.next();
                if (token == null || !stemmedForms.add(token.termText())) continue;
                resultList.add(tfidf.text);
            }
            catch (IOException e) {}
        }
        return resultList.toArray(new String[resultList.size()]);
    }

    public static Map<String, TfIdf> getTfIdfs(String text, SearchWrapper[] wrappers) throws SiapiException {
        HashMap<String, TfIdf> lookup = new HashMap<String, TfIdf>();
        DefaultSimilarity similarity = new DefaultSimilarity();
        HashSet<String> testedTokens = new HashSet<String>();
        String[] tokenTexts = text.split("\\s|-|\"");
        for (SearchWrapper wrapper : wrappers) {
            IndexReader reader = wrapper.getTextIndexReader();
            QueryBuilder builder = new QueryBuilder(wrapper, null);
            testedTokens.clear();
            for (String tokText : tokenTexts) {
                List<TToken> tokens;
                TfIdf tfidf = (TfIdf)lookup.get(tokText = builder.normalize(tokText));
                if (tfidf == null) {
                    tfidf = new TfIdf();
                    tfidf.text = tokText;
                    lookup.put(tokText, tfidf);
                }
                ++tfidf.tf;
                if (!testedTokens.add(tokText) || (tokens = builder.tokenize(tokText)).isEmpty()) continue;
                double docFreq = 0.0;
                for (TToken token : tokens) {
                    String nTokText = builder.normalize(token);
                    double weight = 5.0;
                    if (TokenizationUtils.isSubwordForm(nTokText)) {
                        weight = TokenizationUtils.isSubword(nTokText) ? 2.0 : 1.0;
                    } else if (TokenizationUtils.isLemma(nTokText)) {
                        weight = 3.0;
                    }
                    try {
                        docFreq += weight * (double)reader.docFreq(new Term("_plain", nTokText));
                    }
                    catch (IOException e) {}
                }
                if (docFreq == 0.0) continue;
                double idf = similarity.idf((int)Math.ceil(docFreq /= (double)tokens.size()), (int)(NORM_WEIGHT * (double)reader.maxDoc()));
                tfidf.idf += idf * idf;
            }
        }
        return lookup;
    }

    private static interface Handler
    extends NormalizedResultHandler,
    ResultHandler.PositionResultHandler {
    }

    private static class SingleStringReader
    extends Reader {
        String str;
        int pos;

        private SingleStringReader() {
        }

        public void setStr(String str) {
            this.str = str;
            this.pos = 0;
        }

        public void close() throws IOException {
        }

        public int read(char[] cbuf, int off, int len) throws IOException {
            if (this.pos >= this.str.length()) {
                return -1;
            }
            len = Math.min(len, this.str.length() - this.pos);
            int i = 0;
            while (i < len) {
                cbuf[off + i] = this.str.charAt(this.pos);
                ++i;
                ++this.pos;
            }
            return len;
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    public static class TfIdf
    implements Comparable<TfIdf> {
        public int tf;
        public double idf;
        public String text;
        double value;

        @Override
        public int compareTo(TfIdf o) {
            return Double.compare(this.value, o.value);
        }
    }
}

