/*
 * Decompiled with CFR 0.152.
 */
package org.lockss.laaws.crawler.impl.pluggable;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.IOFileFilter;
import org.apache.commons.io.filefilter.WildcardFileFilter;
import org.apache.logging.log4j.Level;
import org.lockss.config.ConfigManager;
import org.lockss.config.Configuration;
import org.lockss.crawler.CrawlerStatus;
import org.lockss.daemon.LockssRunnable;
import org.lockss.laaws.crawler.impl.ApiUtils;
import org.lockss.laaws.crawler.impl.pluggable.CmdLineCrawler;
import org.lockss.laaws.crawler.impl.pluggable.PluggableCrawl;
import org.lockss.log.L4JLogger;
import org.lockss.plugin.ArchivalUnit;
import org.lockss.plugin.AuUtil;
import org.lockss.state.AuState;
import org.lockss.util.MimeUtil;
import org.lockss.util.StringUtil;
import org.lockss.util.UrlUtil;
import org.lockss.util.io.FileUtil;
import org.lockss.util.rest.crawler.CrawlDesc;
import org.lockss.util.rest.crawler.CrawlJob;
import org.lockss.util.rest.crawler.JobStatus;
import org.lockss.util.time.Deadline;

public class CmdLineCrawl
extends PluggableCrawl {
    private static final L4JLogger log = L4JLogger.getLogger();
    protected CmdLineCrawler crawler;
    protected String threadName;
    protected List<String> command = null;
    protected File tmpDir = null;
    protected String outputLogLevel;
    protected String errorLogLevel;
    protected static Pattern successPattern = Pattern.compile("[0-9]{4}-[0-9]{2}-[0-9]{2}.*[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]{1,3})? URL:.* .*[^]]*] -> .*[^\"]*", 2);
    protected static Pattern errorPattern = Pattern.compile(".*\\bERROR\\b.*", 2);
    protected static Pattern urlPattern = Pattern.compile("((https?|ftp|gopher|telnet|file):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?\\+-=\\\\\\.&]*)", 2);
    protected static Pattern bytesPattern = Pattern.compile("\\[[0-9]+/[0-9]+]", 2);
    private static final String ERROR_STR = " ERROR ";
    List<String> stems = new ArrayList<String>();
    List<String> reqUrls;
    AuState auState;
    boolean isRepairCrawl;
    CmdLineCrawler.RunnableCrawlJob runnableJob;
    LockssRunnable lockssRunnable;
    Process crawlProcess;

    public CmdLineCrawl(CmdLineCrawler crawler, ArchivalUnit au, CrawlJob crawlJob) {
        super(crawler.getCrawlerConfig(), au, crawlJob);
        this.crawler = crawler;
        String jobId = crawlJob.getJobId();
        this.threadName = this.crawlDesc.getCrawlKind() + ":" + this.crawlDesc.getCrawlerId() + ":" + jobId.substring(0, Integer.min(6, jobId.length() - 1));
        Configuration currentConfig = ConfigManager.getCurrentConfig();
        this.outputLogLevel = crawler.getOutputLogLevel();
        this.errorLogLevel = crawler.getErrorLogLevel();
        this.isRepairCrawl = crawlJob.getCrawlDesc().getCrawlKind() == CrawlDesc.CrawlKindEnum.REPAIR;
        this.reqUrls = this.crawlDesc.getCrawlList();
    }

    @Override
    public CrawlerStatus startCrawl() {
        CrawlerStatus cs = this.getCrawlerStatus();
        JobStatus js = this.getJobStatus();
        try {
            js.setStatusCode(JobStatus.StatusCodeEnum.ACTIVE);
            js.setMsg("Active.");
            this.tmpDir = FileUtil.createTempDir((String)this.crawlDesc.getCrawlerId(), (String)"");
            this.command = this.crawler.getCmdLineBuilder().buildCommandLine(this.getCrawlDesc(), this.tmpDir);
        }
        catch (IOException ioe) {
            log.error("Unable to create output directory for crawl:", (Throwable)ioe);
            js.setStatusCode(JobStatus.StatusCodeEnum.ERROR);
        }
        return cs;
    }

    @Override
    public CrawlerStatus stopCrawl() {
        JobStatus status = this.getJobStatus();
        JobStatus.StatusCodeEnum statusCode = status.getStatusCode();
        if (statusCode != JobStatus.StatusCodeEnum.ACTIVE && statusCode != JobStatus.StatusCodeEnum.QUEUED) {
            return this.getCrawlerStatus();
        }
        status.setStatusCode(JobStatus.StatusCodeEnum.ABORTED);
        status.setMsg("Crawl Aborted.");
        if (this.lockssRunnable != null) {
            if (this.crawlProcess != null) {
                this.crawlProcess.destroy();
            }
            this.lockssRunnable.interruptThread();
            if (this.crawlerStatus != null) {
                this.crawlerStatus.setCrawlStatus(5, "Crawl Aborted");
            }
            this.lockssRunnable.waitExited(Deadline.in((long)this.crawler.getProcExitWait()));
            log.debug("Exited!!");
            this.lockssRunnable = null;
        } else if (this.crawlerStatus != null) {
            this.crawlerStatus.setCrawlStatus(5, "Request removed from queue.");
            this.crawler.getPluggableCrawlManager().handleCrawlComplete(this.crawlerStatus);
            this.getAuState().newCrawlFinished(this.crawlerStatus.getCrawlStatus(), null);
            this.crawlerStatus.signalCrawlEnded();
        }
        this.deleteTmpDir();
        return this.getCrawlerStatus();
    }

    AuState getAuState() {
        if (this.auState == null) {
            this.auState = AuUtil.getAuState((ArchivalUnit)this.crawlerStatus.getAu());
        }
        return this.auState;
    }

    void setAuState(AuState state) {
        this.auState = state;
    }

    public File getTmpDir() {
        return this.tmpDir;
    }

    public Collection<File> getWarcFiles(List<String> exts) {
        return FileUtils.listFiles((File)this.tmpDir, (IOFileFilter)new WildcardFileFilter(exts), null);
    }

    public List<String> getCommand() {
        return this.command;
    }

    protected List<String> getReqUrls() {
        return this.reqUrls;
    }

    protected List<String> getStems() {
        return this.stems;
    }

    public LockssRunnable getRunnable() {
        this.lockssRunnable = new LockssRunnable(this.threadName){

            /*
             * WARNING - Removed try catching itself - possible behaviour change.
             */
            public void lockssRun() {
                log.debug2("{} started", (Object)this);
                CmdLineCrawl.this.crawlerStatus = CmdLineCrawl.this.getCrawlerStatus();
                CmdLineCrawl.this.auState = AuUtil.getAuState((ArchivalUnit)CmdLineCrawl.this.crawlerStatus.getAu());
                boolean joinOutputStreams = CmdLineCrawl.this.crawler.isJoinOutputStreams();
                try {
                    CmdLineCrawl.this.auState.newCrawlStarted();
                    this.nowRunning();
                    CmdLineCrawl.this.crawlerStatus = CmdLineCrawl.this.startCrawl();
                    ProcessBuilder builder = new ProcessBuilder(new String[0]);
                    builder.directory(CmdLineCrawl.this.tmpDir);
                    builder.command(CmdLineCrawl.this.command);
                    if (joinOutputStreams) {
                        builder.redirectErrorStream(true);
                    }
                    log.debug("Starting crawl process in {} with command {}...", (Object)CmdLineCrawl.this.tmpDir, (Object)String.join((CharSequence)" ", CmdLineCrawl.this.command));
                    CmdLineCrawl.this.crawlProcess = builder.start();
                    StreamGobbler outputGobbler = new StreamGobbler(CmdLineCrawl.this.crawlProcess.getInputStream(), "OUTPUT");
                    outputGobbler.start();
                    if (!joinOutputStreams) {
                        StreamGobbler errorGobbler = new StreamGobbler(CmdLineCrawl.this.crawlProcess.getErrorStream(), "ERROR");
                        errorGobbler.start();
                    }
                    CmdLineCrawl.this.crawlerStatus.signalCrawlStarted();
                    int exitCode = CmdLineCrawl.this.crawlProcess.waitFor();
                    if (CmdLineCrawl.this.crawler.didCrawlSucceed(exitCode)) {
                        log.info("Crawl process succeeded with exitCode {}", (Object)exitCode);
                        Collection<File> warcFiles = CmdLineCrawl.this.getWarcFiles(CmdLineCrawl.this.crawler.getWarcFileFilter());
                        log.info("Importing {} into repository.", (Object)StringUtil.numberOfUnits((long)warcFiles.size(), (String)"warcfile"));
                        CmdLineCrawl.this.crawlerStatus.setCrawlStatus(2, "Storing");
                        for (File warc : warcFiles) {
                            boolean isCompressed = FileUtil.getExtension((String)warc.getName()).equalsIgnoreCase(CmdLineCrawl.this.crawler.getCompressedWarcExtension());
                            CmdLineCrawl.this.crawler.storeInRepository(CmdLineCrawl.this.crawlerStatus.getAuId(), warc, isCompressed);
                        }
                        CmdLineCrawl.this.crawler.updateAuConfig(CmdLineCrawl.this.getAu(), CmdLineCrawl.this.isRepairCrawl, CmdLineCrawl.this.getReqUrls(), CmdLineCrawl.this.getStems());
                        CmdLineCrawl.this.crawlerStatus.setCrawlStatus(3);
                        log.info("Content stored, crawl complete.");
                        CmdLineCrawl.this.deleteTmpDir();
                    } else {
                        log.info("Crawl process failed with exitCode {}", (Object)exitCode);
                        CmdLineCrawl.this.crawlerStatus.setCrawlStatus(4, "crawl exited with code: " + exitCode);
                        CmdLineCrawl.this.deleteTmpDir();
                    }
                }
                catch (IOException ioe) {
                    log.error("Exception caught running process", (Throwable)ioe);
                    CmdLineCrawl.this.crawlerStatus.setCrawlStatus(4, "Exception thrown: " + ioe.getMessage());
                }
                catch (InterruptedException ignore) {
                    if (CmdLineCrawl.this.crawlerStatus.getCrawlStatus() != 5) {
                        CmdLineCrawl.this.crawlerStatus.setCrawlStatus(5, "Crawl Interrupted");
                    }
                }
                finally {
                    log.debug2("finishing crawl status updates...");
                    CmdLineCrawl.this.auState.newCrawlFinished(CmdLineCrawl.this.crawlerStatus.getCrawlStatus(), null);
                    CmdLineCrawl.this.crawlerStatus.signalCrawlEnded();
                    ApiUtils.getPluggableCrawlManager().handleCrawlComplete(CmdLineCrawl.this.crawlerStatus);
                    this.setThreadName(CmdLineCrawl.this.threadName + ": idle");
                    log.debug2("{} terminating", (Object)this);
                    CmdLineCrawl.this.lockssRunnable = null;
                }
            }
        };
        return this.lockssRunnable;
    }

    void deleteTmpDir() {
        log.debug("Deleting tree at {}", (Object)this.tmpDir);
        boolean isDeleted = true;
        if (this.tmpDir != null) {
            isDeleted = FileUtil.delTree((File)this.tmpDir);
        }
        log.trace("isDeleted = {}", (Object)isDeleted);
        if (!isDeleted) {
            log.warn("Temporary directory {} cannot be deleted after processing", (Object)this.tmpDir);
        }
    }

    L4JLogger getLog() {
        return log;
    }

    public void parseLine(String pre, String line) {
        List<String> urls;
        Object msg_line = line;
        if (pre != null) {
            msg_line = StringUtil.removeTrailing((String)pre, (String)":") + " " + line;
        }
        if ((urls = CmdLineCrawl.extractUrls((String)msg_line)).isEmpty()) {
            return;
        }
        if (urls.size() > 1) {
            log.warn("Found multiple urls in message line: " + (String)msg_line);
        }
        String url = urls.get(0);
        String ext = null;
        try {
            ext = UrlUtil.getFileExtension((String)url);
        }
        catch (MalformedURLException e) {
            log.warn("Attempt to parse log line with malformed url.");
        }
        Matcher matcher = successPattern.matcher((CharSequence)msg_line);
        if (matcher.matches()) {
            try {
                String stem = UrlUtil.getUrlPrefix((String)url);
                if (!this.stems.contains(stem)) {
                    this.stems.add(stem);
                }
            }
            catch (MalformedURLException e) {
                log.error("Found malformed url: " + url);
            }
            long bytesFetched = CmdLineCrawl.extractBytes((String)msg_line);
            this.crawlerStatus.signalUrlFetched(url);
            this.crawlerStatus.addContentBytesFetched(bytesFetched);
            if (ext != null) {
                this.crawlerStatus.signalMimeTypeOfUrl(MimeUtil.getMimeTypeFromExtension((String)ext), url);
            }
        } else {
            matcher = errorPattern.matcher((CharSequence)msg_line);
            if (matcher.matches()) {
                int idx = ((String)msg_line).indexOf(ERROR_STR);
                String error = ((String)msg_line).substring(idx + ERROR_STR.length());
                this.crawlerStatus.signalErrorForUrl(url, error);
                if (ext != null) {
                    this.crawlerStatus.signalMimeTypeOfUrl(MimeUtil.getMimeTypeFromExtension((String)ext), url);
                }
            } else {
                log.warn("Unknown pattern while parsing log line: " + line);
            }
        }
    }

    public static List<String> extractUrls(String text) {
        ArrayList<String> containedUrls = new ArrayList<String>();
        Matcher m = urlPattern.matcher(text);
        while (m.find()) {
            containedUrls.add(text.substring(m.start(0), m.end(0)));
        }
        return containedUrls;
    }

    public static long extractBytes(String str) {
        long bytes = 0L;
        Matcher m = bytesPattern.matcher(str);
        if (m.find()) {
            String found = str.substring(m.start(0), m.end(0));
            String bytestr = found.substring(1, found.indexOf("/"));
            bytes = Long.parseLong(bytestr);
        }
        return bytes;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("[");
        sb.append("(I): ");
        sb.append(this.getAuId());
        sb.append(", pri: ");
        sb.append(this.getCrawlDesc().getPriority());
        if (this.getCrawlDesc().getRefetchDepth() >= 0) {
            sb.append(", depth: ");
            sb.append(this.getCrawlDesc().getRefetchDepth());
        }
        sb.append(", crawlDesc: ");
        sb.append(this.getCrawlDesc());
        sb.append(", tmpDir: ");
        sb.append(this.tmpDir);
        sb.append(", command: ");
        sb.append(this.command);
        sb.append(", crawlerStatus: ");
        sb.append(this.getCrawlerStatus());
        sb.append("]");
        return sb.toString();
    }

    private class StreamGobbler
    extends Thread {
        InputStream is;
        String type;

        private StreamGobbler(InputStream is, String type) {
            this.is = is;
            this.type = type;
        }

        @Override
        public void run() {
            try {
                BufferedReader br = new BufferedReader(new InputStreamReader(this.is));
                String line = null;
                String pre = null;
                while ((line = br.readLine()) != null) {
                    if (this.type.equals("ERROR")) {
                        if (line.endsWith(":")) {
                            pre = line;
                        } else {
                            CmdLineCrawl.this.parseLine(pre, line);
                            pre = null;
                        }
                        log.log(Level.toLevel((String)CmdLineCrawl.this.errorLogLevel), line);
                        continue;
                    }
                    if (!this.type.equals("OUTPUT")) continue;
                    if (line.endsWith(":")) {
                        pre = line;
                    } else {
                        CmdLineCrawl.this.parseLine(pre, line);
                        pre = null;
                    }
                    log.log(Level.toLevel((String)CmdLineCrawl.this.outputLogLevel), line);
                }
            }
            catch (IOException ioe) {
                log.error("Exception thrown while reading stream output.", (Throwable)ioe);
            }
        }
    }
}

