/*
 * Decompiled with CFR 0.152.
 */
package org.wikibrain.wikidata;

import gnu.trove.map.TIntIntMap;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.net.URL;
import java.sql.SQLException;
import java.text.ParseException;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.conf.DefaultOptionBuilder;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.cmd.EnvBuilder;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.MetaInfoDao;
import org.wikibrain.core.dao.UniversalPageDao;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.download.FileDownloader;
import org.wikibrain.parser.WpParseException;
import org.wikibrain.utils.ParallelForEach;
import org.wikibrain.utils.Procedure;
import org.wikibrain.utils.WpIOUtils;
import org.wikibrain.utils.WpThreadUtils;
import org.wikibrain.wikidata.WikidataDao;
import org.wikibrain.wikidata.WikidataDumpHelper;
import org.wikibrain.wikidata.WikidataEntity;
import org.wikibrain.wikidata.WikidataParser;
import org.wikibrain.wikidata.WikidataStatement;

public class WikidataDumpLoader {
    private static final Logger LOG = Logger.getLogger(WikidataDumpLoader.class.getName());
    private final AtomicInteger counter = new AtomicInteger();
    private final MetaInfoDao metaDao;
    private final WikidataDao wikidataDao;
    private final UniversalPageDao universalPageDao;
    private final LanguageSet languages;
    private final WikidataParser wdParser = new WikidataParser();
    private final TIntSet universalIds;
    private boolean keepAllLabeledEntities = false;

    public WikidataDumpLoader(WikidataDao wikidataDao, MetaInfoDao metaDao, UniversalPageDao upDao, LanguageSet langs) throws DaoException {
        this.wikidataDao = wikidataDao;
        this.metaDao = metaDao;
        this.languages = langs;
        this.universalPageDao = upDao;
        Map localMaps = this.universalPageDao.getAllUnivToLocalIdsMap(this.languages);
        this.universalIds = new TIntHashSet();
        for (TIntIntMap langMap : localMaps.values()) {
            this.universalIds.addAll(langMap.keys());
        }
    }

    public void load(final File file) throws IOException {
        LineIterator lines = new LineIterator((Reader)WpIOUtils.openBufferedReader((File)file));
        ParallelForEach.iterate((Iterator)lines, (int)WpThreadUtils.getMaxThreads(), (int)1000, (Procedure)new Procedure<String>(){

            public void call(String page) {
                try {
                    WikidataDumpLoader.this.save(file, page);
                    WikidataDumpLoader.this.metaDao.incrementRecords(WikidataEntity.class);
                }
                catch (WpParseException e) {
                    LOG.log(Level.WARNING, "parsing of " + file.getPath() + " failed:", e);
                    WikidataDumpLoader.this.metaDao.incrementErrorsQuietly(WikidataEntity.class);
                }
                catch (DaoException e) {
                    LOG.log(Level.WARNING, "parsing of " + file.getPath() + " failed:", e);
                    WikidataDumpLoader.this.metaDao.incrementErrorsQuietly(WikidataEntity.class);
                }
            }
        }, (int)Integer.MAX_VALUE);
        lines.close();
    }

    private void save(File file, String json) throws WpParseException, DaoException {
        if (!json.contains("{")) {
            return;
        }
        if ((json = json.trim()).endsWith(",")) {
            json = json.substring(0, json.length() - 1);
        }
        if (this.counter.incrementAndGet() % 100000 == 0) {
            LOG.info("processing wikidata entity " + this.counter.get());
        }
        WikidataEntity entity = this.wdParser.parse(json);
        entity.prune(this.languages);
        if (this.keepEntity(entity)) {
            this.wikidataDao.save(entity);
        }
    }

    private boolean keepEntity(WikidataEntity entity) {
        if (entity.getType() == WikidataEntity.Type.PROPERTY) {
            return true;
        }
        if (this.universalIds.contains(entity.getId())) {
            return true;
        }
        return this.keepAllLabeledEntities && !entity.getLabels().isEmpty();
    }

    public void setKeepAllLabeledEntities(boolean keepAllLabeledEntities) {
        this.keepAllLabeledEntities = keepAllLabeledEntities;
    }

    public static void main(String[] args) throws ClassNotFoundException, SQLException, IOException, ConfigurationException, DaoException, WikiBrainException, ParseException, InterruptedException {
        File path;
        CommandLine cmd;
        Options options = new Options();
        options.addOption(new DefaultOptionBuilder().withLongOpt("drop-tables").withDescription("drop and recreate all tables").create("d"));
        options.addOption(new DefaultOptionBuilder().withLongOpt("keep-labeled").withDescription("keep all labeled entities").create("k"));
        EnvBuilder.addStandardOptions((Options)options);
        PosixParser parser = new PosixParser();
        try {
            cmd = parser.parse(options, args);
        }
        catch (org.apache.commons.cli.ParseException e) {
            System.err.println("Invalid option usage: " + e.getMessage());
            new HelpFormatter().printHelp("WikidataDumpLoader", options);
            return;
        }
        Env env = new EnvBuilder(cmd).build();
        Configurator conf = env.getConfigurator();
        if (cmd.getArgList().isEmpty()) {
            WikidataDumpHelper helper = new WikidataDumpHelper();
            String downloadDir = conf.getConf().get().getString("download.path");
            File dest = FileUtils.getFile((String[])new String[]{downloadDir, helper.getMostRecentFile()});
            if (!dest.isFile()) {
                dest.getParentFile().mkdirs();
                File tmp = File.createTempFile("wikibrain-wikidata", "json");
                FileUtils.deleteQuietly((File)tmp);
                URL url = new URL(helper.getMostRecentUrl());
                FileDownloader downloader = new FileDownloader();
                downloader.download(url, tmp);
                if (dest.isFile()) {
                    throw new IllegalStateException();
                }
                FileUtils.moveFile((File)tmp, (File)dest);
            }
            path = dest;
        } else if (cmd.getArgList().size() == 1) {
            path = new File(cmd.getArgList().get(0).toString());
        } else {
            System.err.println("Invalid option usage:");
            new HelpFormatter().printHelp("WikidataDumpLoader", options);
            return;
        }
        WikidataDao wdDao = (WikidataDao)conf.get(WikidataDao.class);
        UniversalPageDao upDao = (UniversalPageDao)conf.get(UniversalPageDao.class);
        MetaInfoDao metaDao = (MetaInfoDao)conf.get(MetaInfoDao.class);
        LanguageSet langs = (LanguageSet)conf.get(LanguageSet.class);
        WikidataDumpLoader loader = new WikidataDumpLoader(wdDao, metaDao, upDao, langs);
        if (cmd.hasOption("d")) {
            wdDao.clear();
            metaDao.clear(WikidataStatement.class);
        }
        if (cmd.hasOption("k")) {
            loader.setKeepAllLabeledEntities(true);
        }
        wdDao.beginLoad();
        metaDao.beginLoad();
        loader.load(path);
        LOG.info("building indexes");
        wdDao.endLoad();
        metaDao.endLoad();
        LOG.info("finished");
    }
}

