/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.timeml.corpus;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
import org.cleartk.syntax.constituent.type.TerminalTreebankNode;
import org.cleartk.syntax.constituent.util.TopTreebankNode;
import org.cleartk.syntax.constituent.util.TreebankFormatParser;
import org.cleartk.syntax.constituent.util.TreebankNodeUtility;
import org.cleartk.timeml.type.Text;
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.cleartk.util.ViewURIUtil;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.util.JCasUtil;

public class TreebankAligningAnnotator
extends JCasAnnotator_ImplBase {
    public static final String PARAM_TREEBANK_DIRECTORY_NAME = ConfigurationParameterFactory.createConfigurationParameterName(TreebankAligningAnnotator.class, (String)"treebankDirectoryName");
    @ConfigurationParameter(mandatory=true, description="the path to the treebank directory containing the XX/wsj_XXXX.mrg files.")
    private String treebankDirectoryName;
    private File treebankDirectory;

    public void setTreebankDirectoryName(String treebankDirectoryName) {
        this.treebankDirectoryName = treebankDirectoryName;
    }

    public static AnalysisEngineDescription getDescription(String treeBankDir) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(TreebankAligningAnnotator.class, (Object[])new Object[]{PARAM_TREEBANK_DIRECTORY_NAME, treeBankDir});
    }

    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        this.treebankDirectory = new File(this.treebankDirectoryName);
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        List utilTrees;
        String mrgText;
        String wsjPath = ViewURIUtil.getURI((JCas)jCas).getPath();
        String wsjName = new File(wsjPath).getName();
        String subdir = wsjName.substring(4, 6);
        String mrgName = wsjName.replaceAll("\\.tml", ".mrg");
        File mrgFile = new File(new File(this.treebankDirectory, subdir), mrgName);
        try {
            mrgText = FileUtils.file2String((File)mrgFile);
        }
        catch (IOException e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
        Collection texts = JCasUtil.select((JCas)jCas, Text.class);
        if (texts.size() != 1) {
            throw CleartkExtractorException.wrongNumberOfAnnotations(Text.class, (int)1, (int)texts.size());
        }
        int offset = ((Text)texts.iterator().next()).getBegin();
        String text = jCas.getDocumentText();
        try {
            utilTrees = TreebankFormatParser.parseDocument((String)mrgText, (int)offset, (String)text);
        }
        catch (Exception e) {
            this.getContext().getLogger().log(Level.WARNING, String.format("Skipping %s due to alignment problems", wsjPath), (Throwable)e);
            return;
        }
        for (TopTreebankNode utilTree : utilTrees) {
            org.cleartk.syntax.constituent.type.TopTreebankNode tree = TreebankNodeUtility.convert((TopTreebankNode)utilTree, (JCas)jCas, (boolean)true);
            Sentence sentence = new Sentence(jCas, tree.getBegin(), tree.getEnd());
            sentence.addToIndexes();
            for (int i = 0; i < tree.getTerminals().size(); ++i) {
                TerminalTreebankNode leaf = tree.getTerminals(i);
                if (leaf.getBegin() == leaf.getEnd()) continue;
                Token token = new Token(jCas, leaf.getBegin(), leaf.getEnd());
                token.setPos(leaf.getNodeType());
                token.addToIndexes();
            }
        }
    }
}

