/*
 * Decompiled with CFR 0.152.
 */
package org.cogroo.analyzer;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.util.Span;
import org.cogroo.analyzer.Analyzer;
import org.cogroo.text.Document;
import org.cogroo.text.Sentence;
import org.cogroo.text.Token;
import org.cogroo.text.impl.TokenImpl;

public class Tokenizer
implements Analyzer {
    private TokenizerME tokenizer;
    private static final Pattern OPEN_QUOTATION = Pattern.compile("[\u00ab\u201c]");
    private static final Pattern CLOSE_QUOTATION = Pattern.compile("[\u00bb\u201d]");

    public Tokenizer(TokenizerME tokenizer) {
        this.tokenizer = tokenizer;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void analyze(Document document) {
        List<Sentence> sentences = document.getSentences();
        for (Sentence sentence : sentences) {
            Span[] tokensSpan;
            String sentenceString = sentence.getText();
            TokenizerME tokenizerME = this.tokenizer;
            synchronized (tokenizerME) {
                tokensSpan = this.tokenizer.tokenizePos(this.preprocess(sentenceString));
            }
            ArrayList<Token> tokens = new ArrayList<Token>(tokensSpan.length);
            for (int i = 0; i < tokensSpan.length; ++i) {
                TokenImpl token = new TokenImpl(tokensSpan[i].getStart(), tokensSpan[i].getEnd(), ((Object)tokensSpan[i].getCoveredText((CharSequence)sentenceString)).toString());
                tokens.add(token);
            }
            sentence.setTokens(tokens);
        }
    }

    private String preprocess(String sentenceString) {
        sentenceString = OPEN_QUOTATION.matcher(sentenceString).replaceAll("\"");
        sentenceString = CLOSE_QUOTATION.matcher(sentenceString).replaceAll("\"");
        return sentenceString;
    }
}

