/*
 * Decompiled with CFR 0.152.
 */
package tratz.runpipe.impl.annotators.tokenize;

import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import tratz.runpipe.Annotation;
import tratz.runpipe.InitializationException;
import tratz.runpipe.ProcessException;
import tratz.runpipe.TextDocument;
import tratz.runpipe.annotations.Sentence;
import tratz.runpipe.annotations.Token;
import tratz.runpipe.impl.AnnotatorImpl;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class WhitespaceTokenizer
extends AnnotatorImpl {
    @Override
    public void initialize(Map<String, String> args) throws InitializationException {
    }

    @Override
    public void process(TextDocument doc) throws ProcessException {
        List<? extends Annotation> allSentences = doc.getAnnotationList(Sentence.class);
        if (allSentences != null) {
            Pattern whitespacePattern = Pattern.compile("\\s+");
            for (Sentence sentence : allSentences) {
                String text = sentence.getAnnotText();
                String[] split = whitespacePattern.split(text);
                int sentenceStart = sentence.getStart();
                int index = 0;
                for (String token : split) {
                    int tokenIndex = text.indexOf(token, index);
                    if (!token.trim().equals("")) {
                        Token newToken = new Token(doc, sentenceStart + tokenIndex, sentenceStart + tokenIndex + token.length());
                        doc.addAnnotation(newToken);
                    }
                    index = tokenIndex + token.length();
                }
            }
        }
    }
}

