/*
 * Decompiled with CFR 0.152.
 */
package sklearn2pmml.feature_extraction.text;

import com.google.common.base.Joiner;
import java.util.List;
import org.dmg.pmml.TextIndex;
import sklearn.feature_extraction.text.Tokenizer;

public class Splitter
extends Tokenizer {
    public Splitter() {
        this("sklearn2pmml.feature_extraction.text", "Splitter");
    }

    public Splitter(String module, String name) {
        super(module, name);
    }

    @Override
    public TextIndex configure(TextIndex textIndex) {
        String wordSeparatorRE = this.getWordSeparatorRE();
        return textIndex.setTokenize(Boolean.TRUE).setWordSeparatorCharacterRE(wordSeparatorRE);
    }

    @Override
    public String formatStopWordsRE(List<String> stopWords) {
        String wordSeparatorRE = this.getWordSeparatorRE();
        Joiner joiner = Joiner.on((String)"|");
        return "(^|" + wordSeparatorRE + ")\\p{Punct}*(" + joiner.join(stopWords) + ")\\p{Punct}*(" + wordSeparatorRE + "|$)";
    }

    public void __setstate__(String wordSeparatorRE) {
        this.setWordSeparatorRE(wordSeparatorRE);
    }

    public String getWordSeparatorRE() {
        return this.getString("word_separator_re");
    }

    public Splitter setWordSeparatorRE(String wordSeparatorRE) {
        this.put("word_separator_re", wordSeparatorRE);
        return this;
    }
}

