/*
 * Decompiled with CFR 0.152.
 */
package org.molgenis.data.semanticsearch.string;

import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.tartarus.snowball.ext.PorterStemmer;

public class Stemmer {
    private static final String ILLEGAL_REGEX_PATTERN = "[^a-zA-Z0-9 ]";

    public static String cleanStemPhrase(String phrase) {
        StringBuilder stringBuilder = new StringBuilder();
        for (String word : Stemmer.replaceIllegalCharacter(phrase).split(" ")) {
            String stemmedWord = Stemmer.stem(word);
            if (!StringUtils.isNotEmpty((CharSequence)stemmedWord)) continue;
            if (stringBuilder.length() > 0) {
                stringBuilder.append(' ');
            }
            stringBuilder.append(stemmedWord);
        }
        return stringBuilder.toString();
    }

    public static String stem(String word) {
        PorterStemmer porterStemmer = new PorterStemmer();
        porterStemmer.setCurrent(word);
        porterStemmer.stem();
        return porterStemmer.getCurrent();
    }

    public static String stemAndJoin(Set<String> terms) {
        return terms.stream().map(Stemmer::stem).collect(Collectors.joining(" "));
    }

    public static String replaceIllegalCharacter(String string) {
        return string.replaceAll(ILLEGAL_REGEX_PATTERN, " ").replaceAll(" +", " ").trim().toLowerCase();
    }
}

