/*
 * Decompiled with CFR 0.152.
 */
package org.lockss.laaws.crawler.impl.pluggable;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.lockss.config.AuConfiguration;
import org.lockss.config.ConfigManager;
import org.lockss.db.DbException;
import org.lockss.laaws.crawler.impl.ApiUtils;
import org.lockss.laaws.crawler.impl.PluggableCrawlManager;
import org.lockss.laaws.crawler.impl.pluggable.CmdLineCrawl;
import org.lockss.laaws.crawler.impl.pluggable.PluggableCrawl;
import org.lockss.laaws.crawler.impl.pluggable.PluggableCrawler;
import org.lockss.laaws.crawler.model.CrawlerConfig;
import org.lockss.laaws.crawler.utils.ExecutorUtils;
import org.lockss.log.L4JLogger;
import org.lockss.plugin.ArchivalUnit;
import org.lockss.plugin.CachedUrl;
import org.lockss.util.ClassUtil;
import org.lockss.util.ListUtil;
import org.lockss.util.StringUtil;
import org.lockss.util.rest.crawler.CrawlDesc;
import org.lockss.util.rest.crawler.CrawlJob;
import org.lockss.util.rest.crawler.JobStatus;
import org.lockss.util.rest.repo.LockssRepository;

public class CmdLineCrawler
implements PluggableCrawler {
    public static final String PREFIX = "org.lockss.crawlerservice.";
    private static final L4JLogger log = L4JLogger.getLogger();
    public static final String ATTR_CRAWL_EXECUTOR_SPEC = "executor.spec";
    public static final String DEFAULT_CMDLINE_CRAWL_EXECUTOR_SPEC = "10;2";
    public static final String ATTR_EXCLUDE_STATUS_PATTERN = "excludeStatusPattern";
    public static final String DEFAULT_EXCLUDE_STATUS_PATTERN = "(4|5)..";
    public static final String ATTR_OUTPUT_LOG_LEVEL = "outputLogLevel";
    public static final String DEFAULT_OUTPUT_LOG_LEVEL = "INFO";
    public static final String ATTR_ERROR_LOG_LEVEL = "errorLogLevel";
    public static final String DEFAULT_ERROR_LOG_LEVEL = "ERROR";
    public static final String ATTR_JOIN_OUTPUT_STREAMS = "joinOutputStreams";
    public static final String DEFAULT_JOIN_OUTPUT_STREAMS = "true";
    public static final String ATTR_PROC_EXIT_WAIT = "procExitWait";
    public static final long DEFAULT_PROC_EXIT_WAIT = 600000L;
    public static final String ATTR_COMPRESS_WARC = "preferCompressedWarcs";
    public static final String DEFAULT_COMPRESS_WARC = "false";
    public static final String ATTR_COMPRESSED_WARC_FILE_EXTENSION = "compressedWarcExt";
    public static final String DEFAULT_COMPRESSED_WARC_FILE_EXTENSION = ".warc.gz";
    public static final String ATTR_UNCOMPRESSED_WARC_FILE_EXTENSION = "uncompressedWarcExt";
    public static final String DEFAULT_UNCOMPRESSED_WARC_FILE_EXTENSION = ".warc";
    public static final String ATTR_UNSUPPORTED_PARAMS = "unsupportedParams";
    public static final String START_URL_KEY = "start_urls";
    public static final String URL_STEMS_KEY = "url_stems";
    protected CrawlerConfig config;
    protected String outputLogLevel;
    protected String errorLogLevel;
    protected String excludeStatusPattern;
    protected boolean compressWarc;
    protected List<String> warcFileFilter;
    protected long procExitWait;
    protected List<String> unsupportedParams;
    protected HashMap<String, CmdLineCrawl> crawlMap = new HashMap();
    protected CommandLineBuilder cmdLineBuilder;
    protected PluggableCrawlManager pcManager;
    private LockssRepository v2Repo;
    private ThreadPoolExecutor crawlQueueExecutor;
    private String namespace;
    private boolean joinOutputStreams;
    private String compressedWarcExtension;
    private String uncompressedWarcExtension;

    public CmdLineCrawler setCrawlManager(PluggableCrawlManager pcManager) {
        this.pcManager = pcManager;
        return this;
    }

    public CmdLineCrawler setV2Repo(LockssRepository v2Repo) {
        this.v2Repo = v2Repo;
        return this;
    }

    public CmdLineCrawler setNamespace(String namespace) {
        this.namespace = namespace;
        return this;
    }

    public CmdLineCrawler setConfig(CrawlerConfig config) {
        this.updateCrawlerConfig(config);
        return this;
    }

    public CmdLineCrawler setCmdLineBuilder(CommandLineBuilder cmdLineBuilder) {
        this.cmdLineBuilder = cmdLineBuilder;
        return this;
    }

    public CrawlerConfig getConfig() {
        return this.config;
    }

    protected CommandLineBuilder getCmdLineBuilder() {
        return this.cmdLineBuilder;
    }

    @Override
    public String getCrawlerId() {
        return this.config.getCrawlerId();
    }

    @Override
    public void updateCrawlerConfig(CrawlerConfig crawlerConfig) {
        this.config = crawlerConfig;
        Map<String, String> attr = crawlerConfig.getAttributes();
        String crawlerId = attr.get("crawlerId");
        String attrName = "cmdLineBuilder";
        String builderClassName = this.config.getAttributes().get(attrName);
        if (builderClassName != null) {
            try {
                log.debug2("Instantiating " + builderClassName);
                Class<?> builderClass = Class.forName(builderClassName);
                CommandLineBuilder clb = (CommandLineBuilder)ClassUtil.instantiate((String)builderClassName, builderClass);
                this.setCmdLineBuilder(clb);
            }
            catch (Exception ex) {
                log.error("Unable to instantiate CommandLineBuilder: {} for Crawler {} ", (Object)builderClassName, (Object)crawlerId);
            }
        }
        String qspec = attr.getOrDefault(ATTR_CRAWL_EXECUTOR_SPEC, "100;2");
        this.initCrawlScheduler(qspec);
        this.excludeStatusPattern = attr.getOrDefault(ATTR_EXCLUDE_STATUS_PATTERN, DEFAULT_EXCLUDE_STATUS_PATTERN);
        this.outputLogLevel = attr.getOrDefault(ATTR_OUTPUT_LOG_LEVEL, DEFAULT_OUTPUT_LOG_LEVEL);
        this.errorLogLevel = attr.getOrDefault(ATTR_ERROR_LOG_LEVEL, DEFAULT_ERROR_LOG_LEVEL);
        this.joinOutputStreams = Boolean.parseBoolean(attr.getOrDefault(ATTR_JOIN_OUTPUT_STREAMS, DEFAULT_JOIN_OUTPUT_STREAMS));
        this.compressWarc = Boolean.parseBoolean(attr.getOrDefault(ATTR_COMPRESS_WARC, DEFAULT_COMPRESS_WARC));
        this.compressedWarcExtension = attr.getOrDefault(ATTR_COMPRESSED_WARC_FILE_EXTENSION, DEFAULT_COMPRESSED_WARC_FILE_EXTENSION);
        this.uncompressedWarcExtension = attr.getOrDefault(ATTR_UNCOMPRESSED_WARC_FILE_EXTENSION, DEFAULT_UNCOMPRESSED_WARC_FILE_EXTENSION);
        this.warcFileFilter = ListUtil.list((Object)("*" + this.compressedWarcExtension), (Object)("*" + this.uncompressedWarcExtension));
        String unsupported = attr.getOrDefault(ATTR_UNSUPPORTED_PARAMS, "");
        this.unsupportedParams = !StringUtil.isNullString((String)unsupported) ? Stream.of(unsupported.split(";")).map(String::trim).collect(Collectors.toList()) : Collections.EMPTY_LIST;
        this.procExitWait = 600000L;
        String procWaitStr = attr.get(ATTR_PROC_EXIT_WAIT);
        if (!StringUtil.isNullString((String)procWaitStr)) {
            try {
                this.procExitWait = StringUtil.parseTimeInterval((String)procWaitStr);
            }
            catch (NumberFormatException nfe) {
                log.error("The value of the param {} for {} is invalid: using default.", (Object)ATTR_PROC_EXIT_WAIT, (Object)crawlerId);
            }
        }
    }

    @Override
    public CrawlerConfig getCrawlerConfig() {
        return this.config;
    }

    public long getProcExitWait() {
        return this.procExitWait;
    }

    public List<String> getWarcFileFilter() {
        return this.warcFileFilter;
    }

    public String getCompressedWarcExtension() {
        return this.compressedWarcExtension;
    }

    public String getUncompressedWarcExtension() {
        return this.uncompressedWarcExtension;
    }

    public List<String> getUnsupportedParams() {
        return this.unsupportedParams;
    }

    public boolean useCompressWarc() {
        return this.compressWarc;
    }

    @Override
    public PluggableCrawl requestCrawl(ArchivalUnit au, CrawlJob crawlJob) {
        if (!this.isElgibleForCrawl(crawlJob.getCrawlDesc().getAuId())) {
            log.warn("Crawl request {} ignored! au is not eligible for crawl.", (Object)crawlJob);
            return null;
        }
        CmdLineCrawl clCrawl = new CmdLineCrawl(this, au, crawlJob);
        this.crawlMap.put(crawlJob.getJobId(), clCrawl);
        clCrawl.runnableJob = new RunnableCrawlJob(crawlJob, clCrawl);
        this.crawlQueueExecutor.submit(clCrawl.runnableJob);
        JobStatus status = crawlJob.getJobStatus();
        status.setStatusCode(JobStatus.StatusCodeEnum.QUEUED);
        status.setMsg("Pending.");
        return clCrawl;
    }

    public boolean isElgibleForCrawl(String auId) {
        for (CmdLineCrawl crawl : this.crawlMap.values()) {
            if (!crawl.getAuId().equals(auId) || crawl.isRepairCrawl) continue;
            return this.pcManager.isEligibleForCrawl(auId);
        }
        return true;
    }

    @Override
    public PluggableCrawl stopCrawl(String crawlId) {
        CmdLineCrawl clCrawl = this.crawlMap.remove(crawlId);
        if (clCrawl != null) {
            this.crawlQueueExecutor.remove(clCrawl.runnableJob);
            clCrawl.stopCrawl();
        }
        return clCrawl;
    }

    @Override
    public PluggableCrawl getCrawl(String crawlId) {
        return this.crawlMap.get(crawlId);
    }

    @Override
    public void deleteAllCrawls() {
        for (String key : this.crawlMap.keySet()) {
            this.stopCrawl(key);
        }
        this.crawlQueueExecutor.shutdownNow();
    }

    @Override
    public boolean isCrawlerEnabled() {
        Map<String, String> attrs = this.config.getAttributes();
        return Boolean.parseBoolean(attrs.get(this.config.getCrawlerId() + "Enabled"));
    }

    @Override
    public void shutdown() {
        this.shutdownWithWait(this.crawlQueueExecutor);
    }

    protected void shutdownWithWait(ExecutorService scheduler) {
        scheduler.shutdown();
        try {
            if (!scheduler.awaitTermination(60L, TimeUnit.SECONDS)) {
                scheduler.shutdownNow();
                if (!scheduler.awaitTermination(60L, TimeUnit.SECONDS)) {
                    log.error("Pool did not terminate");
                }
            }
        }
        catch (InterruptedException ie) {
            scheduler.shutdownNow();
            Thread.currentThread().interrupt();
        }
    }

    @Override
    public void disable(boolean abortCrawling) {
        if (abortCrawling) {
            List<Runnable> runnables = this.crawlQueueExecutor.shutdownNow();
            if (log.isDebug2Enabled()) {
                log.debug2("successfullly aborted {}", runnables);
            }
        } else {
            this.crawlQueueExecutor.shutdown();
        }
    }

    @Override
    public void setPluggableCrawlManager(PluggableCrawlManager pluggableCrawlManager) {
        this.pcManager = pluggableCrawlManager;
    }

    public PluggableCrawlManager getPluggableCrawlManager() {
        return this.pcManager;
    }

    public void storeInRepository(String auId, File warcFile, boolean isCompressed) throws IOException {
        try (BufferedInputStream bis = new BufferedInputStream(Files.newInputStream(warcFile.toPath(), new OpenOption[0]));){
            this.ensureRepo();
            log.debug2("Calling Repository with warc for auid {}", (Object)auId);
            this.v2Repo.addArtifacts(this.namespace, auId, (InputStream)bis, LockssRepository.ArchiveType.WARC, false, this.excludeStatusPattern);
        }
        log.debug2("Returned from call to repo");
    }

    public void updateAuConfig(ArchivalUnit au, boolean isRepairCrawl, List<String> reqUrls, List<String> crawlStems) throws IOException {
        log.debug("updating config for {}", (Object)au.getName());
        ConfigManager cm = this.pcManager.getConfigManager();
        try {
            AuConfiguration au_config = cm.retrieveArchivalUnitConfiguration(au.getAuId());
            if (!isRepairCrawl) {
                log.debug2("Updating AuConfig for start urls.");
                this.updateAuConfigItem(au_config, START_URL_KEY, this.getCheckedStartUrls(au, reqUrls));
            }
            log.debug2("Updating AuConfig for url stems: {}", reqUrls);
            this.updateAuConfigItem(au_config, URL_STEMS_KEY, crawlStems);
            cm.storeArchivalUnitConfiguration(au_config, true);
        }
        catch (DbException dbe) {
            throw new IOException("Unable update AU configuration", dbe);
        }
    }

    List<String> getCheckedStartUrls(ArchivalUnit au, List<String> inUrls) {
        ArrayList<String> outUrls = new ArrayList<String>();
        if (inUrls != null && !inUrls.isEmpty()) {
            for (String url : inUrls) {
                outUrls.add(this.checkStartUrl(au, url));
            }
        }
        return outUrls;
    }

    String checkStartUrl(ArchivalUnit au, String startUrl) {
        String newUrl;
        CachedUrl cu;
        if (!startUrl.endsWith("/") && !(cu = au.makeCachedUrl(startUrl)).hasContent() && (cu = au.makeCachedUrl(newUrl = startUrl + "/")).hasContent()) {
            return newUrl;
        }
        return startUrl;
    }

    void updateAuConfigItem(AuConfiguration auConfig, String key, List<String> updateList) {
        ArrayList<Object> configList;
        Map configMap = auConfig.getAuConfig();
        String config_str = (String)configMap.get(key);
        if (config_str != null) {
            configList = new ArrayList<String>(Arrays.asList(config_str.split(";")));
            log.debug2("Current config string: {} a list of {} elements.", (Object)config_str, (Object)configList.size());
        } else {
            configList = new ArrayList();
        }
        for (String elem : updateList) {
            if (configList.contains(elem)) continue;
            configList.add(elem);
        }
        auConfig.putAuConfigItem(key, String.join((CharSequence)";", configList));
    }

    protected void initCrawlScheduler(String reqSpec) {
        this.crawlQueueExecutor = ExecutorUtils.createOrReConfigureExecutor(this.crawlQueueExecutor, reqSpec, DEFAULT_CMDLINE_CRAWL_EXECUTOR_SPEC);
    }

    protected boolean didCrawlSucceed(int exitCode) {
        return exitCode == 0;
    }

    public String getOutputLogLevel() {
        return this.outputLogLevel;
    }

    public String getErrorLogLevel() {
        return this.errorLogLevel;
    }

    public boolean isJoinOutputStreams() {
        return this.joinOutputStreams;
    }

    private void ensureRepo() throws IOException {
        if (this.v2Repo == null) {
            this.v2Repo = ApiUtils.getV2Repo();
            this.namespace = ApiUtils.getV2Namespace();
        }
        if (this.v2Repo == null || !this.v2Repo.isReady()) {
            log.error("Unable to store warc artifacts - Repository is not ready for connections.");
            throw new IOException("Unable to store warc artifacts - Repository is not ready for connections.");
        }
    }

    void setCrawlQueueExecutor(ThreadPoolExecutor executor) {
        this.crawlQueueExecutor = executor;
    }

    public static interface CommandLineBuilder {
        public List<String> buildCommandLine(CrawlDesc var1, File var2) throws IOException;
    }

    public static class RunnableCrawlJob
    implements Runnable,
    Comparable<RunnableCrawlJob> {
        private final CmdLineCrawl cmdLineCrawl;
        public final CrawlJob crawlJob;

        public RunnableCrawlJob(CrawlJob crawlJob, CmdLineCrawl cmdLineCrawl) {
            this.crawlJob = crawlJob;
            this.cmdLineCrawl = cmdLineCrawl;
        }

        public int getPriority() {
            return this.crawlJob.getCrawlDesc().getPriority();
        }

        public long getRequestDate() {
            return this.crawlJob.getRequestDate();
        }

        @Override
        public int compareTo(RunnableCrawlJob other) {
            int p2;
            int p1 = this.getPriority();
            if (p1 < (p2 = other.getPriority())) {
                return 1;
            }
            if (p1 > p2) {
                return -1;
            }
            return Long.compare(this.getRequestDate(), other.getRequestDate());
        }

        @Override
        public void run() {
            this.cmdLineCrawl.getRunnable().run();
        }
    }
}

