package org.fryske_akademy.exist.lucene;

import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeFactory;

import java.io.Reader;

/**
 * For this tokenizer "; " or ";" are token separators, and therefore no token characters.
 */
public class NoSemiColonTokenizer extends CharTokenizer {

    public NoSemiColonTokenizer(Reader input) {
        super(input);
    }

    public NoSemiColonTokenizer(AttributeFactory factory, Reader input) {
        super(factory, input);
    }

    private boolean prevsemicolon = false;

    @Override
    protected boolean isTokenChar(int c) {
        boolean semicolon = c == ';';
        boolean rv = c == ' ' ? !prevsemicolon : !semicolon;
        prevsemicolon=semicolon;
        return rv;
    }
}
