/*
 * Decompiled with CFR 0.152.
 */
package chalk.corpora;

import chalk.tools.cmdline.CmdLineUtil;
import chalk.tools.namefind.NameSampleDataStream;
import chalk.tools.sentdetect.SentenceDetectorEvaluationMonitor;
import chalk.tools.sentdetect.SentenceDetectorEvaluator;
import chalk.tools.sentdetect.SentenceDetectorFactory;
import chalk.tools.sentdetect.SentenceDetectorME;
import chalk.tools.sentdetect.SentenceModel;
import chalk.tools.sentdetect.SentenceSample;
import chalk.tools.sentdetect.SentenceSampleStream;
import chalk.tools.tokenize.TokenSampleStream;
import chalk.tools.tokenize.TokenizerEvaluationMonitor;
import chalk.tools.tokenize.TokenizerEvaluator;
import chalk.tools.tokenize.TokenizerFactory;
import chalk.tools.tokenize.TokenizerME;
import chalk.tools.tokenize.TokenizerModel;
import chalk.tools.util.ObjectStream;
import chalk.tools.util.PlainTextByLineStream;
import chalk.tools.util.TrainingParameters;
import java.io.File;
import java.io.FileInputStream;
import scala.Array$;
import scala.Predef$;
import scala.collection.Seq;
import scala.collection.immutable.Nil$;
import scala.reflect.ClassTag$;

public final class MascEval$ {
    public static final MascEval$ MODULE$;

    static {
        new MascEval$();
    }

    public void main(String[] args) {
        File mascDir = new File(args[0]);
        TrainingParameters mlParams = TrainingParameters.defaultParams();
        char[] eos = (char[])Array$.MODULE$.apply((Seq)Nil$.MODULE$, ClassTag$.MODULE$.Char());
        SentenceSampleStream sentenceTraining = this.sentenceSampleStream(new File(mascDir, "train/train-sent.txt"));
        SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(null, "en", true, null, eos);
        SentenceModel sentModel = SentenceDetectorME.train("en", (ObjectStream<SentenceSample>)sentenceTraining, sdFactory, mlParams);
        sentenceTraining.close();
        SentenceDetectorEvaluator sentenceEvaluator = new SentenceDetectorEvaluator(new SentenceDetectorME(sentModel), new SentenceDetectorEvaluationMonitor[0]);
        SentenceSampleStream sentenceDev = this.sentenceSampleStream(new File(mascDir, "dev/dev-sent.txt"));
        sentenceEvaluator.evaluate(sentenceDev);
        sentenceDev.close();
        TokenSampleStream tokTraining = this.tokenSampleStream(new File(mascDir, "train/train-tok.txt"));
        TokenizerFactory tokFactory = TokenizerFactory.create(null, "en", null, false, null);
        TokenizerModel tokModel = TokenizerME.train(tokTraining, tokFactory, mlParams);
        tokTraining.close();
        TokenizerEvaluator tokEvaluator = new TokenizerEvaluator(new TokenizerME(tokModel), new TokenizerEvaluationMonitor[0]);
        TokenSampleStream tokDev = this.tokenSampleStream(new File(mascDir, "dev/dev-tok.txt"));
        tokEvaluator.evaluate(tokDev);
        tokDev.close();
        Predef$.MODULE$.println((Object)"Sentence detection");
        Predef$.MODULE$.println((Object)sentenceEvaluator.getFMeasure());
        Predef$.MODULE$.println();
        Predef$.MODULE$.println((Object)"Tokenization");
        Predef$.MODULE$.println((Object)tokEvaluator.getFMeasure());
        Predef$.MODULE$.println();
    }

    private SentenceSampleStream sentenceSampleStream(File file) {
        FileInputStream sampleDataIn = CmdLineUtil.openInFile(file);
        PlainTextByLineStream lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(), "UTF-8");
        return new SentenceSampleStream(lineStream);
    }

    private TokenSampleStream tokenSampleStream(File file) {
        FileInputStream sampleDataIn = CmdLineUtil.openInFile(file);
        PlainTextByLineStream lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(), "UTF-8");
        return new TokenSampleStream(lineStream);
    }

    private NameSampleDataStream nerSampleStream(File file) {
        FileInputStream sampleDataIn = CmdLineUtil.openInFile(file);
        PlainTextByLineStream lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(), "UTF-8");
        return new NameSampleDataStream(lineStream);
    }

    private MascEval$() {
        MODULE$ = this;
    }
}

