/*
 * Decompiled with CFR 0.152.
 */
package com.s24.search.solr.analyzers;

import com.google.common.annotations.VisibleForTesting;
import com.s24.search.solr.analyzers.AnalyzingSentenceTokenizer;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class AnalyzingSentenceTokenizerFactory
extends TokenizerFactory
implements ResourceLoaderAware {
    private static final Logger logger = LoggerFactory.getLogger(AnalyzingSentenceTokenizerFactory.class);
    private static final String FILTER_ARG = "filter";
    private boolean filter = false;
    private static final String STOP_WORD_FILE = "stopwordfile";
    private String stopWordFilePath;
    private CharArraySet stopWords = null;
    @VisibleForTesting
    static final float DEFAULT_COMMA_WORD_THRESHOLD = 0.2f;
    private static final String COMMA_WORD_THRESHOLD_ARG = "commaWordThreshold";
    private float commaWordThreshold = 0.2f;
    static final float DEFAULT_MAX_STOPWORD_RATIO = 0.21f;
    private static final String MAX_STOPWORD_RATIO_ARG = "maxStopwordRatio";
    private float maxStopwordRatio = 0.21f;
    static final int DEFAULT_MIN_SENTENCE_LENGTH = 5;
    private static final String MIN_SENTENCE_LENGTH_ARG = "minSentenceLength";
    private int minSentenceLength = 5;

    public AnalyzingSentenceTokenizerFactory(Map<String, String> args) {
        super(args);
        if (args.containsKey(FILTER_ARG)) {
            this.filter = Boolean.parseBoolean(args.get(FILTER_ARG));
        }
        if (args.containsKey(COMMA_WORD_THRESHOLD_ARG)) {
            this.commaWordThreshold = Float.parseFloat(args.get(COMMA_WORD_THRESHOLD_ARG));
        }
        if (args.containsKey(MAX_STOPWORD_RATIO_ARG)) {
            this.maxStopwordRatio = Float.parseFloat(args.get(MAX_STOPWORD_RATIO_ARG));
        }
        if (args.containsKey(MIN_SENTENCE_LENGTH_ARG)) {
            this.minSentenceLength = Integer.parseInt(args.get(MIN_SENTENCE_LENGTH_ARG));
        }
        if (args.containsKey(STOP_WORD_FILE)) {
            this.stopWordFilePath = args.get(STOP_WORD_FILE);
        } else {
            logger.warn("The {} param is not set. The sentences could not be analyzed (due to wrong calcuation of the information gain).", (Object)STOP_WORD_FILE);
        }
    }

    public void inform(ResourceLoader loader) throws IOException {
        if (this.stopWordFilePath != null) {
            try {
                this.stopWords = this.getWordSet(loader, this.stopWordFilePath, true);
            }
            catch (IOException e) {
                throw new RuntimeException(e);
            }
        } else {
            this.stopWords = new CharArraySet(0, false);
        }
    }

    public Tokenizer create(AttributeFactory factory) {
        return new AnalyzingSentenceTokenizer(factory, this.filter, this.stopWords, this.commaWordThreshold, this.maxStopwordRatio, this.minSentenceLength);
    }
}

