/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.timeml.tlink;

import java.io.File;
import java.io.IOException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.factory.UimaContextFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.cleartk.corpus.timeml.PlainTextTlinkGoldAnnotator;
import org.cleartk.corpus.timeml.TimeMlGoldAnnotator;
import org.cleartk.corpus.timeml.TreebankAligningAnnotator;
import org.cleartk.ml.jar.JarClassifierBuilder;
import org.cleartk.snowball.DefaultSnowballStemmer;
import org.cleartk.timeml.tlink.VerbClauseTemporalAnnotator;
import org.cleartk.util.cr.FilesCollectionReader;

public class VerbClauseTemporalTrain {
    private static void error(String message) throws Exception {
        Logger logger = UimaContextFactory.createUimaContext((Object[])new Object[0]).getLogger();
        logger.log(Level.SEVERE, String.format("%s\nusage: VerbClauseTemporalMain timebank-dir treebank-dir", message));
        System.exit(1);
    }

    public static void main(String[] args) throws Exception {
        if (args.length != 2) {
            VerbClauseTemporalTrain.error("wrong number of arguments");
        } else if (!new File(args[0]).exists()) {
            VerbClauseTemporalTrain.error("TimeBank directory not found: " + args[0]);
        } else if (!new File(args[1]).exists()) {
            VerbClauseTemporalTrain.error("TreeBank directory not found: " + args[1]);
        }
        String timeBankDir = args[0];
        String treeBankDir = args[1];
        File cleanedTimeBankDir = VerbClauseTemporalTrain.getCleanedTimeBankDir(timeBankDir);
        timeBankDir = cleanedTimeBankDir.getPath();
        SimplePipeline.runPipeline((CollectionReader)FilesCollectionReader.getCollectionReaderWithPatterns((String)timeBankDir, (String)"TimeMLView", (String[])new String[]{"wsj_.*[.]tml"}), (AnalysisEngineDescription[])new AnalysisEngineDescription[]{TimeMlGoldAnnotator.getDescriptionNoTLINKs(), PlainTextTlinkGoldAnnotator.getDescription(), TreebankAligningAnnotator.getDescription((String)treeBankDir), DefaultSnowballStemmer.getDescription((String)"English"), VerbClauseTemporalAnnotator.FACTORY.getWriterDescription()});
        FileUtils.deleteRecursive((File)cleanedTimeBankDir);
        File trainingDirectory = VerbClauseTemporalAnnotator.FACTORY.getTrainingDirectory();
        JarClassifierBuilder.trainAndPackage((File)trainingDirectory, (String[])new String[0]);
        for (File file : trainingDirectory.listFiles()) {
            File modelFile = JarClassifierBuilder.getModelJarFile((File)trainingDirectory);
            if (file.isDirectory() || file.equals(modelFile)) continue;
            file.delete();
        }
    }

    public static File getCleanedTimeBankDir(String timeBankDir) throws IOException {
        File tempDir = File.createTempFile("TimeBank", "Cleaned");
        tempDir.delete();
        tempDir.mkdir();
        for (File file : new File(timeBankDir).listFiles()) {
            String name = file.getName();
            if (file.isHidden() || name.startsWith(".")) continue;
            String text = FileUtils.file2String((File)file);
            text = text.replaceAll("\\bamp\\b", "&amp;");
            text = text.replaceAll("SampP", "S&amp;P");
            text = text.replaceAll("&&amp;;", "&amp;");
            text = text.replaceAll("---", "");
            text = VerbClauseTemporalTrain.fixTextByFileName(name, text);
            FileUtils.saveString2File((String)text, (File)new File(tempDir, file.getName()));
        }
        return tempDir;
    }

    public static String fixTextByFileName(String name, String text) {
        if (name.equals("wsj_0032.tml")) {
            text = text.replace("the <TIMEX3 tid=\"t18\"", "<TIMEX3 tid=\"t18\"");
        } else if (name.equals("wsj_0159.tml")) {
            text = text.replace("Acquisition has <EVENT eid=\"e11\"", "DD Acquisition has <EVENT eid=\"e11\"");
            text = text.replace("Acquisition <EVENT eid=\"e20\"", "DD Acquisition <EVENT eid=\"e20\"");
        } else if (name.equals("wsj_0266.tml")) {
            text = text.replace("BRUCE R. BENT", "");
        } else if (name.equals("wsj_0344.tml")) {
            text = text.replace(" 30</TIMEX3>.", "</TIMEX3>");
        } else if (name.equals("wsj_0376.tml")) {
            text = text.replace("roughly off", "off roughly");
        } else if (name.equals("wsj_0586.tml")) {
            text = text.replaceAll("(?m)@((?!</HL>).)*?$", "");
        } else if (name.equals("wsj_0612.tml")) {
            text = text.replace("@ <ENAMEX TYPE=\"ORGANIZATION\">CORPORATES", "<ENAMEX TYPE=\"ORGANIZATION\">");
            text = text.replace("@ <ENAMEX TYPE=\"ORGANIZATION\">EUROBONDS", "<ENAMEX TYPE=\"ORGANIZATION\">");
        } else if (name.equals("wsj_0667.tml")) {
            text = text.replace("1988</TIMEX3>.", "</TIMEX3>");
        } else if (name.equals("wsj_0675.tml")) {
            text = text.replace("Markets</ENAMEX>", "Markets</ENAMEX> --");
            text = text.replace("19.29</CARDINAL>.", "</CARDINAL>");
        } else if (name.equals("wsj_0781.tml")) {
            text = text.replace("not definitely", "definitely not");
        } else if (name.equals("wsj_1003.tml")) {
            text = text.replace("a shhha55 cents a share,   ents a share, but  ssa share", "a share");
            text = text.replace("steel business, <EVENT eid=\"e109\"", "Armco, hampered by lower volume in its specialty steel business, <EVENT eid=\"e109\"");
        }
        return text;
    }
}

