/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.clearnlp;

import com.googlecode.clearnlp.engine.EngineGetter;
import com.googlecode.clearnlp.tokenization.AbstractTokenizer;
import java.io.InputStream;
import java.net.URI;
import java.net.URL;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.util.JCasUtil;

public class Tokenizer
extends JCasAnnotator_ImplBase {
    public static final String DEFAULT_DICTIONARY_FILE_NAME = "dictionary-1.2.0.zip";
    public static final String PARAM_LANGUAGE_CODE = ConfigurationParameterFactory.createConfigurationParameterName(Tokenizer.class, (String)"languageCode");
    @ConfigurationParameter(description="Language code for the tokenizer (default value=en).", defaultValue={"en"})
    private String languageCode;
    public static final String PARAM_DICTIONARY_URI = ConfigurationParameterFactory.createConfigurationParameterName(Tokenizer.class, (String)"dictionaryUri");
    @ConfigurationParameter(description="This parameter provides the URI of the tokenizer dictionary file.")
    private URI dictionaryUri;
    private AbstractTokenizer tokenizer;

    public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(Tokenizer.class, (Object[])new Object[0]);
    }

    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        try {
            URL dictionaryURL = this.dictionaryUri == null ? Tokenizer.class.getResource(DEFAULT_DICTIONARY_FILE_NAME).toURI().toURL() : this.dictionaryUri.toURL();
            this.tokenizer = EngineGetter.getTokenizer((String)this.languageCode, (InputStream)dictionaryURL.openStream());
        }
        catch (Exception e) {
            throw new ResourceInitializationException((Throwable)e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        for (Sentence sentence : JCasUtil.select((JCas)jCas, Sentence.class)) {
            String sentenceText = sentence.getCoveredText();
            int sentenceOffset = sentence.getBegin();
            List tokens = this.tokenizer.getTokens(sentence.getCoveredText());
            int offset = 0;
            for (String token : tokens) {
                int tokenBegin = sentenceText.indexOf(token, offset);
                int tokenEnd = tokenBegin + token.length();
                Token cleartkToken = new Token(jCas, sentenceOffset + tokenBegin, sentenceOffset + tokenEnd);
                cleartkToken.addToIndexes();
                offset = tokenEnd;
            }
        }
    }
}

