/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.clearnlp;

import com.google.common.annotations.Beta;
import edu.emory.clir.clearnlp.component.AbstractComponent;
import edu.emory.clir.clearnlp.component.utils.GlobalLexica;
import edu.emory.clir.clearnlp.component.utils.NLPUtils;
import edu.emory.clir.clearnlp.dependency.DEPNode;
import edu.emory.clir.clearnlp.dependency.DEPTree;
import edu.emory.clir.clearnlp.util.lang.TLanguage;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.clearnlp.TokenOps;

@Beta
public abstract class PosTagger_ImplBase<TOKEN_TYPE extends Annotation>
extends JCasAnnotator_ImplBase {
    public static final String DEFAULT_MODEL_PATH = "general-en-pos.xz";
    public static final String PARAM_MODEL_PATH = "modelPath";
    @ConfigurationParameter(name="modelPath", mandatory=false, description="This parameter provides the path to the pos tagger model.", defaultValue={"general-en-pos.xz"})
    private String modelPath;
    public static final String PARAM_LANGUAGE_CODE = "languageCode";
    public static final String DEFAULT_LANGUAGE_CODE = TLanguage.ENGLISH.toString();
    @ConfigurationParameter(name="languageCode", mandatory=false, description="Language code for the pos tagger (default value=ENGLISH).", defaultValue={"ENGLISH"})
    private String languageCode;
    public static final String PARAM_WINDOW_CLASS = "windowClass";
    private static final String WINDOW_TYPE_DESCRIPTION = "specifies the class type of annotations that will be tokenized. By default, the tokenizer will tokenize a document sentence by sentence.  If you do not want to precede tokenization withsentence segmentation, then a reasonable value for this parameter is 'org.apache.uima.jcas.tcas.DocumentAnnotation'";
    @ConfigurationParameter(name="windowClass", mandatory=false, description="specifies the class type of annotations that will be tokenized. By default, the tokenizer will tokenize a document sentence by sentence.  If you do not want to precede tokenization withsentence segmentation, then a reasonable value for this parameter is 'org.apache.uima.jcas.tcas.DocumentAnnotation'", defaultValue={"org.cleartk.token.type.Sentence"})
    private Class<? extends Annotation> windowClass;
    private TokenOps<TOKEN_TYPE> tokenOps;
    private AbstractComponent tagger;

    public PosTagger_ImplBase(TokenOps<TOKEN_TYPE> tokenOps) {
        this.tokenOps = tokenOps;
    }

    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        try {
            ArrayList<String> paths = new ArrayList<String>();
            paths.add("brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt.xz");
            GlobalLexica.initDistributionalSemanticsWords(paths);
            this.tagger = NLPUtils.getPOSTagger((TLanguage)TLanguage.getType((String)this.languageCode), (String)this.modelPath);
        }
        catch (Exception e) {
            throw new ResourceInitializationException((Throwable)e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        for (Annotation window : JCasUtil.select((JCas)jCas, this.windowClass)) {
            List<TOKEN_TYPE> tokens = this.tokenOps.selectTokens(jCas, window);
            if (tokens.size() <= 0) {
                return;
            }
            List tokenStrings = JCasUtil.toText(tokens);
            DEPTree clearNlpDepTree = new DEPTree(tokenStrings);
            this.tagger.process(clearNlpDepTree);
            for (int i = 0; i < tokens.size(); ++i) {
                Annotation token = (Annotation)tokens.get(i);
                DEPNode node = clearNlpDepTree.get(i + 1);
                this.tokenOps.setPos(jCas, token, node.getPOSTag());
            }
        }
    }
}

