/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.dev.dumpcheck;

import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.languagetool.Language;
import org.languagetool.dev.dumpcheck.Sentence;
import org.languagetool.tokenizers.Tokenizer;

public abstract class SentenceSource
implements Iterator<Sentence> {
    private static final int MIN_SENTENCE_SIZE = 10;
    private static final int MIN_SENTENCE_TOKEN_COUNT = 4;
    private static final int MAX_SENTENCE_LENGTH = 300;
    private final Tokenizer wordTokenizer;
    private final Pattern acceptPattern;

    SentenceSource(Language language) {
        this(language, null);
    }

    SentenceSource(Language language, Pattern acceptPattern) {
        this.wordTokenizer = language.getWordTokenizer();
        this.acceptPattern = acceptPattern;
    }

    @Override
    public abstract boolean hasNext();

    @Override
    public abstract Sentence next();

    public abstract String getSource();

    @Override
    public void remove() {
        throw new UnsupportedOperationException("remove not supported");
    }

    public String toString() {
        return this.getSource() + "-" + super.toString();
    }

    protected boolean acceptSentence(String sentence) {
        if (this.acceptPattern != null && !this.acceptPattern.matcher(sentence).find()) {
            return false;
        }
        String trimSentence = sentence.trim();
        return trimSentence.length() >= 10 && trimSentence.length() <= 300 && this.countTokens(trimSentence) >= 4;
    }

    private int countTokens(String sentence) {
        int realTokens = 0;
        List allTokens = this.wordTokenizer.tokenize(sentence);
        for (String token : allTokens) {
            if (token.trim().isEmpty()) continue;
            ++realTokens;
        }
        return realTokens;
    }
}

