/*
 * Decompiled with CFR 0.152.
 */
package epic.corpora;

import epic.corpora.MascFile;
import epic.corpora.MascFile$;
import epic.corpora.MascSentence;
import epic.corpora.MascTransform;
import java.io.File;
import java.io.FileWriter;
import java.io.Serializable;
import scala.Function1;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.GenTraversableOnce;
import scala.collection.IndexedSeqOptimized;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.generic.GenericTraversableTemplate;
import scala.collection.immutable.Range;
import scala.collection.immutable.StringOps;
import scala.collection.immutable.StringOps$;
import scala.collection.mutable.ArrayOps;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.RichChar$;
import scala.runtime.RichInt$;

public final class MascTransform$ {
    public static MascTransform$ MODULE$;

    static {
        new MascTransform$();
    }

    public void main(String[] args) {
        String mascDir = args[0];
        File outputDir = new File(args.length > 1 ? args[1] : "/tmp");
        outputDir.mkdirs();
        Seq targetsAndIndices = (Seq)this.collectTargets(new File(mascDir)).zipWithIndex(Seq$.MODULE$.canBuildFrom());
        Seq trainSet = (Seq)((GenericTraversableTemplate)targetsAndIndices.filter((Function1 & Serializable & scala.Serializable)x$1 -> BoxesRunTime.boxToBoolean((boolean)MascTransform$.$anonfun$main$1(x$1)))).unzip((Function1)Predef$.MODULE$.$conforms())._1();
        Seq devSet = (Seq)((GenericTraversableTemplate)targetsAndIndices.filter((Function1 & Serializable & scala.Serializable)x$2 -> BoxesRunTime.boxToBoolean((boolean)MascTransform$.$anonfun$main$2(x$2)))).unzip((Function1)Predef$.MODULE$.$conforms())._1();
        Seq testSet = (Seq)((GenericTraversableTemplate)targetsAndIndices.filter((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToBoolean((boolean)MascTransform$.$anonfun$main$3(x$3)))).unzip((Function1)Predef$.MODULE$.$conforms())._1();
        this.processSet(outputDir, "train", (Seq<Tuple2<File, String>>)trainSet);
        this.processSet(outputDir, "dev", (Seq<Tuple2<File, String>>)devSet);
        this.processSet(outputDir, "test", (Seq<Tuple2<File, String>>)testSet);
    }

    public Seq<Tuple2<File, String>> collectTargets(File dir2) {
        Seq files = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])dir2.listFiles())).toSeq();
        return (Seq)((Seq)((TraversableLike)files.filter((Function1 & Serializable & scala.Serializable)x$4 -> BoxesRunTime.boxToBoolean((boolean)MascTransform$.$anonfun$collectTargets$1(x$4)))).map((Function1 & Serializable & scala.Serializable)file -> {
            String string = file.getName();
            if (Predef$.MODULE$ == null) {
                throw null;
            }
            return new Tuple2((Object)dir2, new StringOps(string).dropRight(4));
        }, Seq$.MODULE$.canBuildFrom())).$plus$plus((GenTraversableOnce)((TraversableLike)files.filter((Function1 & Serializable & scala.Serializable)x$5 -> BoxesRunTime.boxToBoolean((boolean)x$5.isDirectory()))).flatMap((Function1 & Serializable & scala.Serializable)dir -> this.collectTargets((File)dir), Seq$.MODULE$.canBuildFrom()), Seq$.MODULE$.canBuildFrom());
    }

    public void processSet(File parentDir, String outputName, Seq<Tuple2<File, String>> targets) {
        System.err.println("Creating " + outputName);
        File outputDir = new File(parentDir, outputName);
        outputDir.mkdirs();
        FileWriter outputSentences = new FileWriter(new File(outputDir, outputName + "-sent.txt"));
        FileWriter outputTokens = new FileWriter(new File(outputDir, outputName + "-tok.txt"));
        FileWriter outputNer = new FileWriter(new File(outputDir, outputName + "-ner.txt"));
        MascFile$.MODULE$.apply(targets).foreach((Function1 & Serializable & scala.Serializable)mfile -> {
            MascTransform$.$anonfun$processSet$1(outputSentences, outputTokens, outputNer, mfile);
            return BoxedUnit.UNIT;
        });
        outputNer.flush();
        outputNer.close();
        outputSentences.flush();
        outputSentences.close();
        outputTokens.flush();
        outputTokens.close();
        System.err.println();
    }

    public static final /* synthetic */ boolean $anonfun$main$1(Tuple2 x$1) {
        return x$1._2$mcI$sp() % 5 < 3;
    }

    public static final /* synthetic */ boolean $anonfun$main$2(Tuple2 x$2) {
        return x$2._2$mcI$sp() % 5 == 3;
    }

    public static final /* synthetic */ boolean $anonfun$main$3(Tuple2 x$3) {
        return x$3._2$mcI$sp() % 5 == 4;
    }

    public static final /* synthetic */ boolean $anonfun$collectTargets$1(File x$4) {
        return x$4.getName().endsWith(".txt");
    }

    public static final /* synthetic */ boolean $anonfun$processSet$4(char x$8) {
        if (Predef$.MODULE$ == null) {
            throw null;
        }
        return RichChar$.MODULE$.isSpaceChar$extension(x$8);
    }

    public static final /* synthetic */ void $anonfun$processSet$3(FileWriter outputNer$1, StringBuffer tokenizedSentence$1, Seq tokens$1, Seq postags$1, Seq nerLabels$1, Seq regions$1, MascFile mfile$1, MascSentence sentence$1, int i) {
        Object object = regions$1.apply(i);
        Object object2 = nerLabels$1.apply(i);
        Object object3 = postags$1.apply(i);
        String tok = (String)tokens$1.apply(i);
        String pos = (String)object3;
        String ner = (String)object2;
        MascTransform.MRegion region = (MascTransform.MRegion)object;
        if (Predef$.MODULE$ == null) {
            throw null;
        }
        if (IndexedSeqOptimized.exists$((IndexedSeqOptimized)new StringOps(tok), (Function1 & Serializable & scala.Serializable)x$8 -> BoxesRunTime.boxToBoolean((boolean)MascTransform$.$anonfun$processSet$4(BoxesRunTime.unboxToChar((Object)x$8))))) {
            Predef$.MODULE$.println((Object)("Weird token! '" + tok + "' " + mfile$1.dir() + "/" + mfile$1.prefix() + ".txt:" + region.start() + "-" + region.end()));
        }
        String split = i < sentence$1.numTokens() - 1 && region.end() == ((MascTransform.MRegion)regions$1.apply(i + 1)).start() ? "<SPLIT>" : " ";
        tokenizedSentence$1.append(tok).append(split);
        outputNer$1.write(tok + " " + pos + " " + pos + " " + ner + "\n");
    }

    public static final /* synthetic */ void $anonfun$processSet$2(FileWriter outputSentences$1, FileWriter outputTokens$1, FileWriter outputNer$1, MascFile mfile$1, MascSentence sentence) {
        StringBuffer tokenizedSentence = new StringBuffer();
        if (sentence == null) {
            throw new MatchError((Object)sentence);
        }
        Seq<MascTransform.MRegion> seq = sentence.orderedRegions();
        Seq<String> seq2 = sentence.bioLabels();
        Seq<String> seq3 = sentence.orderedPos();
        Seq<String> seq4 = sentence.orderedTokens();
        Seq<String> tokens = seq4;
        Seq<String> postags = seq3;
        Seq<String> nerLabels = seq2;
        Seq<MascTransform.MRegion> regions = seq;
        int n = 0;
        if (Predef$.MODULE$ == null) {
            throw null;
        }
        Range range = RichInt$.MODULE$.until$extension0(n, sentence.numTokens());
        if (range == null) {
            throw null;
        }
        Range foreach$mVc$sp_this = range;
        if (!foreach$mVc$sp_this.isEmpty()) {
            int foreach$mVc$sp_i = foreach$mVc$sp_this.start();
            while (true) {
                MascTransform$.$anonfun$processSet$3(outputNer$1, tokenizedSentence, tokens, postags, nerLabels, regions, mfile$1, sentence, foreach$mVc$sp_i);
                if (foreach$mVc$sp_i == foreach$mVc$sp_this.scala$collection$immutable$Range$$lastElement()) break;
                foreach$mVc$sp_i += foreach$mVc$sp_this.step();
            }
        }
        outputNer$1.write("\n");
        int sentStart = ((MascTransform.MRegion)sentence.orderedRegions().head()).start();
        int sentEnd = ((MascTransform.MRegion)sentence.orderedRegions().last()).end();
        String string = mfile$1.rawtext();
        if (Predef$.MODULE$ == null) {
            throw null;
        }
        String sentenceText = StringOps$.MODULE$.slice$extension(string, sentStart, sentEnd).replaceAll("\n", " ");
        outputSentences$1.write(sentenceText + "\n");
        outputTokens$1.write(tokenizedSentence.toString().trim() + "\n");
    }

    public static final /* synthetic */ void $anonfun$processSet$1(FileWriter outputSentences$1, FileWriter outputTokens$1, FileWriter outputNer$1, MascFile mfile) {
        mfile.sentences().foreach((Function1 & Serializable & scala.Serializable)sentence -> {
            MascTransform$.$anonfun$processSet$2(outputSentences$1, outputTokens$1, outputNer$1, mfile, sentence);
            return BoxedUnit.UNIT;
        });
    }

    private MascTransform$() {
        MODULE$ = this;
    }
}

