/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.language.extras;

import org.apache.lucene.analysis.ar.ArabicNormalizer;
import org.apache.lucene.analysis.ar.ArabicStemmer;
import org.carrot2.language.ExtendedWhitespaceTokenizer;
import org.carrot2.language.SingleLanguageComponentsProviderImpl;
import org.carrot2.language.Stemmer;
import org.carrot2.language.Tokenizer;
import org.carrot2.language.extras.LuceneStemmerAdapter;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.LabelFormatterImpl;

public class ArabicLanguageComponents
extends SingleLanguageComponentsProviderImpl {
    public static final String NAME = "Arabic";

    public ArabicLanguageComponents() {
        super("Carrot2 (Arabic support via Apache Lucene components)", NAME);
        this.registerResourceless(Tokenizer.class, ExtendedWhitespaceTokenizer::new);
        this.registerResourceless(LabelFormatter.class, () -> new LabelFormatterImpl(" "));
        this.registerDefaultLexicalData();
        this.registerResourceless(Stemmer.class, () -> {
            ArabicStemmer stemmer = new ArabicStemmer();
            ArabicNormalizer normalizer = new ArabicNormalizer();
            return new LuceneStemmerAdapter((word, len) -> {
                int newLen = normalizer.normalize(word, len);
                newLen = stemmer.stem(word, newLen);
                return newLen;
            });
        });
    }
}

