/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.language.chinese;

import java.io.IOException;
import java.io.Reader;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.cn.smart.HMMChineseTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.carrot2.language.SingleLanguageComponentsProviderImpl;
import org.carrot2.language.Stemmer;
import org.carrot2.language.Tokenizer;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.LabelFormatterImpl;
import org.carrot2.util.MutableCharArray;

public class SimplifiedChineseLanguageComponents
extends SingleLanguageComponentsProviderImpl {
    public static final String NAME = "Chinese-Simplified";

    public SimplifiedChineseLanguageComponents() {
        super("Carrot2 (Simplified Chinese via Apache Lucene components)", NAME);
        this.registerResourceless(Stemmer.class, () -> word -> null);
        this.registerResourceless(Tokenizer.class, ChineseTokenizerAdapter::new);
        this.registerResourceless(LabelFormatter.class, () -> new LabelFormatterImpl(""));
        this.registerDefaultLexicalData();
    }

    private static final class ChineseTokenizerAdapter
    implements Tokenizer {
        private static final Pattern numeric = Pattern.compile("[\\-+'$]?\\d+([:\\-/,.]?\\d+)*[%$]?");
        private HMMChineseTokenizer tokenizer;
        private CharTermAttribute term;
        private final MutableCharArray tempCharSequence = new MutableCharArray();

        private ChineseTokenizerAdapter() {
        }

        public short nextToken() throws IOException {
            boolean hasNextToken = this.tokenizer.incrementToken();
            if (hasNextToken) {
                short flags = 0;
                char[] image = this.term.buffer();
                int length = this.term.length();
                this.tempCharSequence.reset(image, 0, length);
                flags = length == 1 && image[0] == ',' ? (short)3 : (numeric.matcher((CharSequence)this.tempCharSequence).matches() ? (short)2 : 1);
                return flags;
            }
            return -1;
        }

        public void setTermBuffer(MutableCharArray array) {
            array.reset(this.term.buffer(), 0, this.term.length());
        }

        public void reset(Reader input) throws IOException {
            if (this.tokenizer != null) {
                this.tokenizer.end();
                this.tokenizer.close();
            } else {
                this.tokenizer = new HMMChineseTokenizer();
                this.term = (CharTermAttribute)this.tokenizer.addAttribute(CharTermAttribute.class);
            }
            this.tokenizer.setReader(input);
            this.tokenizer.reset();
        }
    }
}

