/*
 * Decompiled with CFR 0.152.
 */
package org.dbpedia.extraction.scripts;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.nio.channels.FileChannel;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
import org.dbpedia.extraction.util.FileLike;
import org.dbpedia.extraction.util.IOUtils$;
import org.dbpedia.extraction.util.RichFile$;
import org.dbpedia.extraction.util.StringUtils$;
import scala.Console$;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.Predef;
import scala.Predef$;
import scala.Serializable;
import scala.Tuple2;
import scala.collection.IterableLike;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.SortedSet;
import scala.collection.SortedSet$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayBuffer$;
import scala.collection.mutable.StringBuilder;
import scala.math.Ordering;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.LongRef;
import scala.runtime.ObjectRef;

public final class WikipediaDumpSplitter$ {
    public static final WikipediaDumpSplitter$ MODULE$;
    private final Map<String, Function1<OutputStream, OutputStream>> zippers;

    static {
        new WikipediaDumpSplitter$();
    }

    /*
     * Loose catch block
     * WARNING - void declaration
     */
    public void main(String[] args) {
        boolean bl;
        Predef$.MODULE$.require(args != null && args.length >= 4, (Function0)new Serializable(){

            public final String apply() {
                return "need three args: wikipedia dump file (must be in the multistream format), streams index file, output directory, approx. chunk size (in MB)";
            }
        });
        File dump = new File(args[0]);
        Predef$.MODULE$.require(dump.isFile() && dump.canRead(), (Function0)new Serializable(){

            public final String apply() {
                return "Please specify a valid dump file!";
            }
        });
        File index = new File(args[1]);
        Predef$.MODULE$.require(index.isFile() && index.canRead(), (Function0)new Serializable(){

            public final String apply() {
                return "Please specify a valid streams index!";
            }
        });
        File output = new File(args[2]);
        Predef$.MODULE$.require(output.isDirectory() && output.canWrite(), (Function0)new Serializable(){

            public final String apply() {
                return "Please specify a valid output directory";
            }
        });
        int chunkSize = new StringOps(Predef$.MODULE$.augmentString(args[3])).toInt() * 1024 * 1024;
        Predef$.MODULE$.require(chunkSize > 0, (Function0)new Serializable(){

            public final String apply() {
                return "Please specify a positive integer";
            }
        });
        try {
            bl = Boolean.parseBoolean(args[4]);
        }
        catch (Exception exception) {
            bl = true;
        }
        boolean replace = bl;
        int i = RichFile$.MODULE$.wrapFile(dump).name().lastIndexOf(46);
        String chunkNamePrefix = RichFile$.MODULE$.wrapFile(dump).name().substring(0, i);
        String chunkNameExtension = RichFile$.MODULE$.wrapFile(dump).name().substring(i + 1);
        IntRef chunkNumber = IntRef.create((int)0);
        ObjectRef chunk = ObjectRef.create(null);
        IntRef lines = IntRef.create((int)0);
        long start = System.nanoTime();
        ObjectRef offsets = ObjectRef.create((Object)SortedSet$.MODULE$.apply((Seq)Nil$.MODULE$, (Ordering)Ordering.Long$.MODULE$));
        Seq offsetsSeq = (Seq)Seq$.MODULE$.apply((Seq)Nil$.MODULE$);
        File indexCache = new File(index.getParentFile().getAbsolutePath(), new StringBuilder().append((Object)RichFile$.MODULE$.wrapFile(index).name()).append((Object)".obj").toString());
        if (indexCache.exists()) {
            ObjectInputStream inputStream = new ObjectInputStream(new FileInputStream(indexCache));
            offsetsSeq = (Seq)inputStream.readObject();
            inputStream.close();
        } else {
            IOUtils$.MODULE$.readLines((FileLike)RichFile$.MODULE$.wrapFile(index), IOUtils$.MODULE$.readLines$default$2(), (Function1)new Serializable(lines, start, offsets){
                private final IntRef lines$1;
                private final long start$1;
                private final ObjectRef offsets$1;

                public final void apply(String line) {
                    if (line != null) {
                        ++this.lines$1.elem;
                        this.offsets$1.elem = (SortedSet)((SortedSet)this.offsets$1.elem).$plus((Object)BoxesRunTime.boxToLong((long)new StringOps(Predef$.MODULE$.augmentString(line.substring(0, line.indexOf(58)))).toLong()));
                        if (this.lines$1.elem % 10000 == 0) {
                            WikipediaDumpSplitter$.MODULE$.org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$log(this.lines$1.elem, ((SortedSet)this.offsets$1.elem).size(), this.start$1);
                        }
                    }
                }
                {
                    this.lines$1 = lines$1;
                    this.start$1 = start$1;
                    this.offsets$1 = offsets$1;
                }
            });
            offsetsSeq = ((SortedSet)offsets.elem).toSeq();
            ObjectOutputStream outputStream = new ObjectOutputStream(new FileOutputStream(indexCache));
            outputStream.writeObject(offsetsSeq);
            outputStream.close();
        }
        File header = this.org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$startChunk(output, chunkNamePrefix, "header", chunkNameExtension);
        this.org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$copyToChunk(dump, header, 0L, BoxesRunTime.unboxToLong((Object)offsetsSeq.apply(0)), this.copyToChunk$default$5(), this.copyToChunk$default$6());
        OutputStream footerStream = null;
        File footer = this.org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$startChunk(output, chunkNamePrefix, "footer", chunkNameExtension);
        try {
            footerStream = (OutputStream)this.zipper(RichFile$.MODULE$.wrapFile(dump).name()).apply((Object)new FileOutputStream(footer));
            footerStream.write("</mediawiki>\n".getBytes());
        }
        catch (Throwable throwable) {
            void var23_20;
            var23_20.close();
            throw throwable;
        }
        footerStream.close();
        LongRef low = LongRef.create((long)BoxesRunTime.unboxToLong((Object)offsetsSeq.apply(0)));
        ArrayBuffer boundaries = new ArrayBuffer();
        ((IterableLike)offsetsSeq.drop(1)).foreach((Function1)new Serializable(chunkSize, low, boundaries){
            private final int chunkSize$1;
            private final LongRef low$1;
            private final ArrayBuffer boundaries$1;

            public final void apply(long offset) {
                this.apply$mcVJ$sp(offset);
            }

            public void apply$mcVJ$sp(long offset) {
                if (offset - this.low$1.elem >= (long)this.chunkSize$1) {
                    this.boundaries$1.append((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Tuple2[]{new Tuple2.mcJJ.sp(this.low$1.elem, offset)}));
                    this.low$1.elem = offset;
                }
            }
            {
                this.chunkSize$1 = chunkSize$1;
                this.low$1 = low$1;
                this.boundaries$1 = boundaries$1;
            }
        });
        if (dump.length() - low.elem > 0L) {
            boundaries.append((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Tuple2[]{new Tuple2.mcJJ.sp(low.elem, dump.length())}));
        }
        int digits = ((Object)BoxesRunTime.boxToInteger((int)(boundaries.size() + 1))).toString().length();
        ((TraversableLike)boundaries.zipWithIndex(ArrayBuffer$.MODULE$.canBuildFrom())).withFilter((Function1)new Serializable(){

            public final boolean apply(Tuple2<Tuple2<Object, Object>, Object> check$ifrefutable$1) {
                Tuple2 tuple2;
                Tuple2<Tuple2<Object, Object>, Object> tuple22 = check$ifrefutable$1;
                boolean bl = tuple22 != null && (tuple2 = (Tuple2)tuple22._1()) != null;
                return bl;
            }
        }).foreach((Function1)new Serializable(dump, output, replace, chunkNamePrefix, chunkNameExtension, chunkNumber, chunk, header, footer, boundaries, digits){
            private final File dump$1;
            private final File output$1;
            private final boolean replace$1;
            private final String chunkNamePrefix$1;
            private final String chunkNameExtension$1;
            private final IntRef chunkNumber$1;
            private final ObjectRef chunk$1;
            private final File header$1;
            private final File footer$1;
            private final ArrayBuffer boundaries$1;
            private final int digits$1;

            public final Object apply(Tuple2<Tuple2<Object, Object>, Object> x$1) {
                Tuple2<Tuple2<Object, Object>, Object> tuple2 = x$1;
                if (tuple2 != null) {
                    Tuple2 tuple22 = (Tuple2)tuple2._1();
                    int index = tuple2._2$mcI$sp();
                    if (tuple22 != null) {
                        long low = tuple22._1$mcJ$sp();
                        long high = tuple22._2$mcJ$sp();
                        ++this.chunkNumber$1.elem;
                        this.chunk$1.elem = WikipediaDumpSplitter$.MODULE$.org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$startChunk(this.output$1, this.chunkNamePrefix$1, WikipediaDumpSplitter$.MODULE$.org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$chunkId(this.chunkNumber$1.elem, this.digits$1), this.chunkNameExtension$1);
                        BoxedUnit boxedUnit = this.replace$1 ? BoxesRunTime.boxToLong((long)WikipediaDumpSplitter$.MODULE$.org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$copyToChunk(this.dump$1, (File)this.chunk$1.elem, low, high - low, this.header$1, index != this.boundaries$1.size() - 1 ? this.footer$1 : null)) : BoxedUnit.UNIT;
                        return boxedUnit;
                    }
                }
                throw new MatchError(tuple2);
            }
            {
                this.dump$1 = dump$1;
                this.output$1 = output$1;
                this.replace$1 = replace$1;
                this.chunkNamePrefix$1 = chunkNamePrefix$1;
                this.chunkNameExtension$1 = chunkNameExtension$1;
                this.chunkNumber$1 = chunkNumber$1;
                this.chunk$1 = chunk$1;
                this.header$1 = header$1;
                this.footer$1 = footer$1;
                this.boundaries$1 = boundaries$1;
                this.digits$1 = digits$1;
            }
        });
        return;
        catch (Throwable throwable) {
            void var18_17;
            var18_17.close();
            throw throwable;
        }
        catch (Throwable throwable) {
            void var20_18;
            var20_18.close();
            throw throwable;
        }
    }

    public String org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$chunkId(int current, int digits) {
        return ((TraversableOnce)((SeqLike)new StringOps(Predef$.MODULE$.augmentString((String)new StringOps(Predef$.MODULE$.augmentString(((Object)BoxesRunTime.boxToInteger((int)current)).toString())).reverse())).padTo(digits, (Object)"0", Predef$.MODULE$.fallbackStringCanBuildFrom())).reverse()).mkString();
    }

    public File org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$startChunk(File dir, String prefix, String chunk, String extension) {
        return new File(dir, new StringBuilder().append((Object)prefix).append((Object)BoxesRunTime.boxToCharacter((char)'.')).append((Object)chunk).append((Object)BoxesRunTime.boxToCharacter((char)'.')).append((Object)extension).toString());
    }

    /*
     * WARNING - void declaration
     */
    public long org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$copyToChunk(File from, File to, long offset, long chunkSize, File header, File footer) {
        long l;
        FileChannel toChannel;
        FileChannel fromChannel;
        block3: {
            Console$.MODULE$.err().println(new StringBuilder().append((Object)"Generating chunk ").append((Object)RichFile$.MODULE$.wrapFile(to).name()).append((Object)" from file ").append((Object)RichFile$.MODULE$.wrapFile(from).name()).toString());
            fromChannel = new FileInputStream(from).getChannel();
            toChannel = new FileOutputStream(to).getChannel();
            FileChannel headerChannel = header == null ? null : new FileInputStream(header).getChannel();
            FileChannel footerChannel = footer == null ? null : new FileInputStream(footer).getChannel();
            try {
                toChannel.position(0L);
                long headerTransferred = header == null ? 0L : headerChannel.transferTo(0L, headerChannel.size(), toChannel);
                long transferred = fromChannel.transferTo(offset, chunkSize, toChannel);
                long footerTransferred = footer == null ? 0L : footerChannel.transferTo(0L, footerChannel.size(), toChannel);
                l = headerTransferred + transferred + footerTransferred;
                if (headerChannel == null) break block3;
            }
            catch (Throwable throwable) {
                void var10_8;
                void var9_7;
                void var11_9;
                if (var11_9 != null) {
                    var11_9.close();
                }
                var9_7.close();
                var10_8.close();
                Console$.MODULE$.err().println(new StringBuilder().append((Object)"Done generating chunk ").append((Object)RichFile$.MODULE$.wrapFile(to).name()).append((Object)" from file ").append((Object)RichFile$.MODULE$.wrapFile(from).name()).toString());
                throw throwable;
            }
            headerChannel.close();
        }
        fromChannel.close();
        toChannel.close();
        Console$.MODULE$.err().println(new StringBuilder().append((Object)"Done generating chunk ").append((Object)RichFile$.MODULE$.wrapFile(to).name()).append((Object)" from file ").append((Object)RichFile$.MODULE$.wrapFile(from).name()).toString());
        return l;
    }

    private File copyToChunk$default$5() {
        return null;
    }

    private File copyToChunk$default$6() {
        return null;
    }

    public void org$dbpedia$extraction$scripts$WikipediaDumpSplitter$$log(int lines, int collected, long start) {
        long nanos = System.nanoTime() - start;
        Console$.MODULE$.err().println(new StringBuilder().append((Object)"processed ").append((Object)BoxesRunTime.boxToInteger((int)lines)).append((Object)" lines, collected ").append((Object)BoxesRunTime.boxToInteger((int)collected)).append((Object)" offsets - ").append((Object)StringUtils$.MODULE$.prettyMillis(nanos / 1000000L)).append((Object)" (").append((Object)BoxesRunTime.boxToFloat((float)((float)nanos / (float)lines))).append((Object)" nanos per line)").toString());
    }

    private Map<String, Function1<OutputStream, OutputStream>> zippers() {
        return this.zippers;
    }

    private Function1<OutputStream, OutputStream> zipper(String name) {
        return (Function1)this.zippers().getOrElse((Object)this.suffix(name), (Function0)new Serializable(){

            public final Function1<OutputStream, OutputStream> apply() {
                return new Serializable(this){

                    public final OutputStream apply(OutputStream x) {
                        return (OutputStream)Predef$.MODULE$.identity((Object)x);
                    }
                };
            }
        });
    }

    private String suffix(String name) {
        return name.substring(name.lastIndexOf(46) + 1);
    }

    private WikipediaDumpSplitter$() {
        MODULE$ = this;
        this.zippers = (Map)Predef$.MODULE$.Map().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Tuple2[]{Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"gz"), (Object)new Serializable(){

            public final GZIPOutputStream apply(OutputStream x$2) {
                return new GZIPOutputStream(x$2);
            }
        }), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"bz2"), (Object)new Serializable(){

            public final BZip2CompressorOutputStream apply(OutputStream x$3) {
                return new BZip2CompressorOutputStream(x$3);
            }
        })}));
    }
}

