/*
 * Decompiled with CFR 0.152.
 */
package org.wikibrain.wikidata;

import gnu.trove.map.TIntIntMap;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;
import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.io.FileUtils;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.conf.DefaultOptionBuilder;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.cmd.EnvBuilder;
import org.wikibrain.core.cmd.FileMatcher;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.MetaInfoDao;
import org.wikibrain.core.dao.UniversalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.LanguageInfo;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.core.model.RawPage;
import org.wikibrain.download.DumpFileDownloader;
import org.wikibrain.download.RequestedLinkGetter;
import org.wikibrain.parser.DumpSplitter;
import org.wikibrain.parser.WpParseException;
import org.wikibrain.parser.xml.PageXmlParser;
import org.wikibrain.utils.ParallelForEach;
import org.wikibrain.utils.Procedure;
import org.wikibrain.utils.WpThreadUtils;
import org.wikibrain.wikidata.WikidataDao;
import org.wikibrain.wikidata.WikidataEntity;
import org.wikibrain.wikidata.WikidataParser;
import org.wikibrain.wikidata.WikidataStatement;

public class WikidataDumpLoader {
    private static final Logger LOG = Logger.getLogger(WikidataDumpLoader.class.getName());
    private final AtomicInteger counter = new AtomicInteger();
    private final MetaInfoDao metaDao;
    private final WikidataDao wikidataDao;
    private final UniversalPageDao universalPageDao;
    private final LanguageSet languages;
    private final WikidataParser wdParser = new WikidataParser();
    private final TIntSet universalIds;

    public WikidataDumpLoader(WikidataDao wikidataDao, MetaInfoDao metaDao, UniversalPageDao upDao, LanguageSet langs) throws DaoException {
        this.wikidataDao = wikidataDao;
        this.metaDao = metaDao;
        this.languages = langs;
        this.universalPageDao = upDao;
        Map localMaps = this.universalPageDao.getAllLocalToUnivIdsMap(this.languages);
        this.universalIds = new TIntHashSet();
        for (TIntIntMap langMap : localMaps.values()) {
            this.universalIds.addAll(langMap.valueCollection());
        }
    }

    public void load(final File file) {
        DumpSplitter parser = new DumpSplitter(file);
        ParallelForEach.iterate((Iterator)parser.iterator(), (int)WpThreadUtils.getMaxThreads(), (int)1000, (Procedure)new Procedure<String>(){

            public void call(String page) {
                try {
                    WikidataDumpLoader.this.save(file, page);
                    WikidataDumpLoader.this.metaDao.incrementRecords(WikidataEntity.class);
                }
                catch (WpParseException e) {
                    LOG.log(Level.WARNING, "parsing of " + file.getPath() + " failed:", e);
                    WikidataDumpLoader.this.metaDao.incrementErrorsQuietly(WikidataEntity.class);
                }
                catch (DaoException e) {
                    LOG.log(Level.WARNING, "parsing of " + file.getPath() + " failed:", e);
                    WikidataDumpLoader.this.metaDao.incrementErrorsQuietly(WikidataEntity.class);
                }
            }
        }, (int)Integer.MAX_VALUE);
    }

    private void save(File file, String page) throws WpParseException, DaoException {
        PageXmlParser xmlParser;
        RawPage rp;
        if (this.counter.incrementAndGet() % 10000 == 0) {
            LOG.info("processing wikidata entity " + this.counter.get());
        }
        if ((rp = (xmlParser = new PageXmlParser(LanguageInfo.getByLanguage((Language)Language.EN))).parse(page)).getModel().equals("wikibase-item") || rp.getModel().equals("wikibase-property")) {
            WikidataEntity entity = this.wdParser.parse(rp);
            entity.prune(this.languages);
            if (entity.getType() == WikidataEntity.Type.PROPERTY || this.universalIds.contains(entity.getId())) {
                this.wikidataDao.save(entity);
            }
        } else if (!Arrays.asList("wikitext", "css", "javascript").contains(rp.getModel())) {
            LOG.warning("unknown model: " + rp.getModel() + " in page " + rp.getTitle());
        }
    }

    public static void main(String[] args) throws ClassNotFoundException, SQLException, IOException, ConfigurationException, DaoException, WikiBrainException, ParseException, InterruptedException {
        ArrayList<File> paths;
        CommandLine cmd;
        Options options = new Options();
        options.addOption(new DefaultOptionBuilder().withLongOpt("drop-tables").withDescription("drop and recreate all tables").create("d"));
        EnvBuilder.addStandardOptions((Options)options);
        PosixParser parser = new PosixParser();
        try {
            cmd = parser.parse(options, args);
        }
        catch (org.apache.commons.cli.ParseException e) {
            System.err.println("Invalid option usage: " + e.getMessage());
            new HelpFormatter().printHelp("WikidataDumpLoader", options);
            return;
        }
        Env env = new EnvBuilder(cmd).build();
        Configurator conf = env.getConfigurator();
        if (cmd.getArgList().isEmpty()) {
            File dumpFile = File.createTempFile("wikiapidia", "dumplinks");
            dumpFile.deleteOnExit();
            RequestedLinkGetter getter = new RequestedLinkGetter(Language.WIKIDATA, Arrays.asList(FileMatcher.ARTICLES), new Date());
            FileUtils.writeLines((File)dumpFile, (Collection)getter.getLangLinks());
            String filePath = conf.getConf().get().getString("download.path");
            DumpFileDownloader downloader = new DumpFileDownloader(new File(filePath));
            downloader.downloadFrom(dumpFile);
            paths = new ArrayList();
            for (File f : env.getFiles(new LanguageSet(Language.WIKIDATA), new FileMatcher[]{FileMatcher.ARTICLES})) {
                if (!f.getName().contains("wikidata")) continue;
                paths.add(f);
            }
        } else {
            paths = new ArrayList<File>();
            for (Object arg : cmd.getArgList()) {
                paths.add(new File((String)arg));
            }
        }
        WikidataDao wdDao = (WikidataDao)conf.get(WikidataDao.class);
        UniversalPageDao upDao = (UniversalPageDao)conf.get(UniversalPageDao.class);
        MetaInfoDao metaDao = (MetaInfoDao)conf.get(MetaInfoDao.class);
        LanguageSet langs = (LanguageSet)conf.get(LanguageSet.class);
        final WikidataDumpLoader loader = new WikidataDumpLoader(wdDao, metaDao, upDao, langs);
        if (cmd.hasOption("d")) {
            wdDao.clear();
            metaDao.clear(WikidataStatement.class);
        }
        wdDao.beginLoad();
        metaDao.beginLoad();
        ParallelForEach.loop(paths, (Procedure)new Procedure<File>(){

            public void call(File path) throws Exception {
                LOG.info("processing file: " + path);
                loader.load(path);
            }
        });
        wdDao.endLoad();
        metaDao.endLoad();
    }
}

