public class CmdLineCrawler extends Object implements PluggableCrawler
| Modifier and Type | Class and Description |
|---|---|
static interface |
CmdLineCrawler.CommandLineBuilder |
static class |
CmdLineCrawler.RunnableCrawlJob |
| Constructor and Description |
|---|
CmdLineCrawler()
Instantiates a new Cmd line crawler.
|
| Modifier and Type | Method and Description |
|---|---|
void |
deleteAllCrawls()
Stop all crawls and clear the crawl queue managed by this crawler
|
protected boolean |
didCrawlSucceed(int exitCode) |
void |
disable(boolean abortCrawling)
disable this crawler clearing any queued crawls.
|
protected CmdLineCrawler.CommandLineBuilder |
getCmdLineBuilder() |
String |
getCompressedWarcExtension() |
CrawlerConfig |
getConfig() |
PluggableCrawl |
getCrawl(String crawlId)
Get a Crawl for a given crawl id.
|
CrawlerConfig |
getCrawlerConfig()
Return the configuration for this crawler
|
String |
getCrawlerId()
Return the unique Id for this crawler.
|
String |
getErrorLogLevel() |
String |
getOutputLogLevel() |
PluggableCrawlManager |
getPluggableCrawlManager() |
long |
getProcExitWait() |
String |
getUncompressedWarcExtension() |
List<String> |
getUnsupportedParams() |
List<String> |
getWarcFileFilter() |
protected void |
initCrawlScheduler(String reqSpec) |
boolean |
isCrawlerEnabled()
is this crawler enabled
|
boolean |
isElgibleForCrawl(String auId) |
boolean |
isJoinOutputStreams() |
PluggableCrawl |
requestCrawl(org.lockss.plugin.ArchivalUnit au,
org.lockss.util.rest.crawler.CrawlJob crawlJob) |
CmdLineCrawler |
setCmdLineBuilder(CmdLineCrawler.CommandLineBuilder cmdLineBuilder) |
CmdLineCrawler |
setConfig(CrawlerConfig config) |
CmdLineCrawler |
setCrawlManager(PluggableCrawlManager pcManager) |
CmdLineCrawler |
setNamespace(String namespace) |
void |
setPluggableCrawlManager(PluggableCrawlManager pluggableCrawlManager)
Set the Crawl Manager which created and maintains this crawler.
|
CmdLineCrawler |
setV2Repo(org.lockss.util.rest.repo.LockssRepository v2Repo) |
void |
shutdown()
Shutdown the crawler.
|
protected void |
shutdownWithWait(ExecutorService scheduler) |
PluggableCrawl |
stopCrawl(String crawlId)
Stop a crawl a specific crawl
|
void |
storeInRepository(String auId,
File warcFile,
boolean isCompressed) |
void |
updateAuConfig(org.lockss.plugin.ArchivalUnit au,
boolean isRepairCrawl,
List<String> reqUrls,
List<String> crawlStems) |
void |
updateCrawlerConfig(CrawlerConfig crawlerConfig)
set the configuration parameters for this crawler
|
boolean |
useCompressWarc() |
public static final String PREFIX
public static final String ATTR_CRAWL_EXECUTOR_SPEC
public static final String DEFAULT_CMDLINE_CRAWL_EXECUTOR_SPEC
public static final String ATTR_EXCLUDE_STATUS_PATTERN
public static final String DEFAULT_EXCLUDE_STATUS_PATTERN
public static final String ATTR_OUTPUT_LOG_LEVEL
public static final String DEFAULT_OUTPUT_LOG_LEVEL
public static final String ATTR_ERROR_LOG_LEVEL
public static final String DEFAULT_ERROR_LOG_LEVEL
public static final String ATTR_JOIN_OUTPUT_STREAMS
public static final String DEFAULT_JOIN_OUTPUT_STREAMS
public static final String ATTR_PROC_EXIT_WAIT
public static final long DEFAULT_PROC_EXIT_WAIT
public static final String ATTR_COMPRESS_WARC
public static final String DEFAULT_COMPRESS_WARC
public static final String ATTR_COMPRESSED_WARC_FILE_EXTENSION
public static final String DEFAULT_COMPRESSED_WARC_FILE_EXTENSION
public static final String ATTR_UNCOMPRESSED_WARC_FILE_EXTENSION
public static final String DEFAULT_UNCOMPRESSED_WARC_FILE_EXTENSION
public static final String ATTR_UNSUPPORTED_PARAMS
public static final String START_URL_KEY
public static final String URL_STEMS_KEY
protected CrawlerConfig config
protected String outputLogLevel
protected String errorLogLevel
protected String excludeStatusPattern
protected boolean compressWarc
protected long procExitWait
protected HashMap<String,CmdLineCrawl> crawlMap
protected CmdLineCrawler.CommandLineBuilder cmdLineBuilder
protected PluggableCrawlManager pcManager
public CmdLineCrawler setCrawlManager(PluggableCrawlManager pcManager)
public CmdLineCrawler setV2Repo(org.lockss.util.rest.repo.LockssRepository v2Repo)
public CmdLineCrawler setNamespace(String namespace)
public CmdLineCrawler setConfig(CrawlerConfig config)
public CmdLineCrawler setCmdLineBuilder(CmdLineCrawler.CommandLineBuilder cmdLineBuilder)
public CrawlerConfig getConfig()
protected CmdLineCrawler.CommandLineBuilder getCmdLineBuilder()
public String getCrawlerId()
PluggableCrawlergetCrawlerId in interface PluggableCrawlerpublic void updateCrawlerConfig(CrawlerConfig crawlerConfig)
PluggableCrawlerupdateCrawlerConfig in interface PluggableCrawlercrawlerConfig - the configuration parameters to usepublic CrawlerConfig getCrawlerConfig()
PluggableCrawlergetCrawlerConfig in interface PluggableCrawlerpublic long getProcExitWait()
public String getCompressedWarcExtension()
public String getUncompressedWarcExtension()
public boolean useCompressWarc()
public PluggableCrawl requestCrawl(org.lockss.plugin.ArchivalUnit au, org.lockss.util.rest.crawler.CrawlJob crawlJob)
requestCrawl in interface PluggableCrawlerpublic boolean isElgibleForCrawl(String auId)
public PluggableCrawl stopCrawl(String crawlId)
PluggableCrawlerstopCrawl in interface PluggableCrawlercrawlId - The crawl id of the crawl to stoppublic PluggableCrawl getCrawl(String crawlId)
PluggableCrawlergetCrawl in interface PluggableCrawlercrawlId - The crawl id of the crawl to stoppublic void deleteAllCrawls()
PluggableCrawlerdeleteAllCrawls in interface PluggableCrawlerpublic boolean isCrawlerEnabled()
PluggableCrawlerisCrawlerEnabled in interface PluggableCrawlerpublic void shutdown()
PluggableCrawlershutdown in interface PluggableCrawlerprotected void shutdownWithWait(ExecutorService scheduler)
public void disable(boolean abortCrawling)
PluggableCrawlerdisable in interface PluggableCrawlerabortCrawling - abort the currently running crawls.public void setPluggableCrawlManager(PluggableCrawlManager pluggableCrawlManager)
PluggableCrawlersetPluggableCrawlManager in interface PluggableCrawlerpublic PluggableCrawlManager getPluggableCrawlManager()
public void storeInRepository(String auId, File warcFile, boolean isCompressed) throws IOException
IOExceptionpublic void updateAuConfig(org.lockss.plugin.ArchivalUnit au,
boolean isRepairCrawl,
List<String> reqUrls,
List<String> crawlStems)
throws IOException
IOExceptionprotected void initCrawlScheduler(String reqSpec)
protected boolean didCrawlSucceed(int exitCode)
public String getOutputLogLevel()
public String getErrorLogLevel()
public boolean isJoinOutputStreams()
Copyright © 2000–2023 LOCKSS Program. All rights reserved.