/*
 * Decompiled with CFR 0.152.
 */
package org.dbpedia.extraction.scripts;

import java.io.File;
import java.io.Writer;
import java.nio.charset.Charset;
import org.dbpedia.extraction.scripts.DecodeHtmlEntities$;
import org.dbpedia.extraction.util.FileLike;
import org.dbpedia.extraction.util.IOUtils$;
import org.dbpedia.extraction.util.RichFile$;
import org.dbpedia.extraction.util.StringUtils$;
import org.dbpedia.util.text.ParseExceptionCounter;
import org.dbpedia.util.text.ParseExceptionHandler;
import org.dbpedia.util.text.html.HtmlCoder;
import org.dbpedia.util.text.html.XmlCodes;
import scala.Array$;
import scala.Console$;
import scala.Function0;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;

public final class DecodeHtmlEntities$ {
    public static final DecodeHtmlEntities$ MODULE$;

    static {
        new DecodeHtmlEntities$();
    }

    private String[] split(String arg) {
        return (String[])Predef$.MODULE$.refArrayOps((Object[])Predef$.MODULE$.refArrayOps((Object[])arg.split(",")).map((Function1)new Serializable(){

            public final String apply(String x$1) {
                return x$1.trim();
            }
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).filter((Function1)new Serializable(){

            public final boolean apply(String x$2) {
                return new StringOps(Predef$.MODULE$.augmentString(x$2)).nonEmpty();
            }
        });
    }

    public void main(String[] args) {
        Predef$.MODULE$.require(args != null && args.length == 5, (Function0)new Serializable(){

            public final String apply() {
                return "need five args: directory, comma-separated names of input files (e.g. 'gutenberg'), output dataset name extension (e.g. '_fixed'), file extension (e.g. '_links.nt.gz'), output file encoding";
            }
        });
        File dir = new File(args[0]);
        String[] inputs = this.split(args[1]);
        Predef$.MODULE$.require(Predef$.MODULE$.refArrayOps((Object[])inputs).nonEmpty(), (Function0)new Serializable(){

            public final String apply() {
                return "no input file names";
            }
        });
        String extension = args[2];
        Predef$.MODULE$.require(new StringOps(Predef$.MODULE$.augmentString(extension)).nonEmpty(), (Function0)new Serializable(){

            public final String apply() {
                return "no output name extension";
            }
        });
        String suffix = args[3];
        Predef$.MODULE$.require(new StringOps(Predef$.MODULE$.augmentString(suffix)).nonEmpty(), (Function0)new Serializable(){

            public final String apply() {
                return "no input/output file suffix";
            }
        });
        Charset charset = Charset.forName(args[4]);
        ParseExceptionCounter counter = new ParseExceptionCounter();
        HtmlCoder coder = new HtmlCoder(XmlCodes.NONE);
        coder.setErrorHandler((ParseExceptionHandler)counter);
        Predef$.MODULE$.refArrayOps((Object[])inputs).foreach((Function1)new Serializable(dir, extension, suffix, charset, counter, coder){
            private final File dir$1;
            private final String extension$1;
            private final String suffix$1;
            private final Charset charset$1;
            private final ParseExceptionCounter counter$1;
            public final HtmlCoder coder$1;

            /*
             * WARNING - void declaration
             */
            public final void apply(String input) {
                File inFile = new File(this.dir$1, new StringBuilder().append((Object)input).append((Object)this.suffix$1).toString());
                File outFile = new File(this.dir$1, new StringBuilder().append((Object)input).append((Object)this.extension$1).append((Object)this.suffix$1).toString());
                Console$.MODULE$.err().println(new StringBuilder().append((Object)"reading ").append((Object)inFile).append((Object)" ...").toString());
                Console$.MODULE$.err().println(new StringBuilder().append((Object)"writing ").append((Object)outFile).append((Object)" ...").toString());
                IntRef lineCount = IntRef.create((int)0);
                IntRef changeCount = IntRef.create((int)0);
                long start = System.nanoTime();
                Writer writer2 = IOUtils$.MODULE$.writer((FileLike)RichFile$.MODULE$.wrapFile(outFile), this.charset$1);
                try {
                    IOUtils$.MODULE$.readLines((FileLike)RichFile$.MODULE$.wrapFile(inFile), IOUtils$.MODULE$.readLines$default$2(), (Function1)new Serializable(this, lineCount, changeCount, start, writer2){
                        private final /* synthetic */ anonfun.main.5 $outer;
                        private final IntRef lineCount$1;
                        private final IntRef changeCount$1;
                        private final long start$1;
                        private final Writer writer$1;

                        public final void apply(String line) {
                            if (line != null) {
                                String decoded = this.$outer.coder$1.code(line);
                                this.writer$1.write(decoded);
                                this.writer$1.write(10);
                                if (line != decoded) {
                                    ++this.changeCount$1.elem;
                                }
                                ++this.lineCount$1.elem;
                                if (this.lineCount$1.elem % 1000000 == 0) {
                                    DecodeHtmlEntities$.MODULE$.org$dbpedia$extraction$scripts$DecodeHtmlEntities$$log(this.lineCount$1.elem, this.changeCount$1.elem, this.start$1);
                                }
                            }
                        }
                        {
                            if ($outer == null) {
                                throw null;
                            }
                            this.$outer = $outer;
                            this.lineCount$1 = lineCount$1;
                            this.changeCount$1 = changeCount$1;
                            this.start$1 = start$1;
                            this.writer$1 = writer$1;
                        }
                    });
                }
                catch (Throwable throwable) {
                    void var8_7;
                    var8_7.close();
                    throw throwable;
                }
                writer2.close();
                DecodeHtmlEntities$.MODULE$.org$dbpedia$extraction$scripts$DecodeHtmlEntities$$log(lineCount.elem, changeCount.elem, start);
                Console$.MODULE$.err().println(new StringBuilder().append((Object)"found ").append((Object)BoxesRunTime.boxToInteger((int)this.counter$1.errors())).append((Object)" HTML character reference errors").toString());
                this.counter$1.reset();
            }
            {
                this.dir$1 = dir$1;
                this.extension$1 = extension$1;
                this.suffix$1 = suffix$1;
                this.charset$1 = charset$1;
                this.counter$1 = counter$1;
                this.coder$1 = coder$1;
            }
        });
    }

    public void org$dbpedia$extraction$scripts$DecodeHtmlEntities$$log(int lines, int changed, long start) {
        long micros = (System.nanoTime() - start) / 1000L;
        Console$.MODULE$.err().println(new StringBuilder().append((Object)"read ").append((Object)BoxesRunTime.boxToInteger((int)lines)).append((Object)" lines, changed ").append((Object)BoxesRunTime.boxToInteger((int)changed)).append((Object)" lines in ").append((Object)StringUtils$.MODULE$.prettyMillis(micros / 1000L)).append((Object)" (").append((Object)BoxesRunTime.boxToFloat((float)((float)micros / (float)lines))).append((Object)" micros per line)").toString());
    }

    private DecodeHtmlEntities$() {
        MODULE$ = this;
    }
}

