001/** 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 * 006 */ 007package org.fcrepo.migration; 008 009import static edu.wisc.library.ocfl.api.util.Enforce.expressionTrue; 010import static edu.wisc.library.ocfl.api.util.Enforce.notNull; 011import static org.slf4j.LoggerFactory.getLogger; 012import static picocli.CommandLine.Help.Visibility.ALWAYS; 013 014import java.io.File; 015import java.util.concurrent.Callable; 016 017import org.apache.jena.query.ARQ; 018import org.fcrepo.migration.foxml.AkubraFSIDResolver; 019import org.fcrepo.migration.foxml.ArchiveExportedFoxmlDirectoryObjectSource; 020import org.fcrepo.migration.foxml.InternalIDResolver; 021import org.fcrepo.migration.foxml.LegacyFSIDResolver; 022import org.fcrepo.migration.foxml.NativeFoxmlDirectoryObjectSource; 023import org.fcrepo.migration.handlers.ObjectAbstractionStreamingFedoraObjectHandler; 024import org.fcrepo.migration.handlers.ocfl.ArchiveGroupHandler; 025import org.fcrepo.migration.metrics.PrometheusActuator; 026import org.fcrepo.migration.pidlist.ResumePidListManager; 027import org.fcrepo.migration.pidlist.UserProvidedPidListManager; 028import org.fcrepo.storage.ocfl.OcflObjectSessionFactory; 029 030import org.apache.commons.io.FileUtils; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import ch.qos.logback.classic.Level; 035import ch.qos.logback.classic.LoggerContext; 036import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry; 037import edu.wisc.library.ocfl.api.model.DigestAlgorithm; 038import picocli.CommandLine; 039import picocli.CommandLine.Command; 040import picocli.CommandLine.Option; 041 042 043/** 044 * This class provides a simple CLI for running and configuring migration-utils 045 * - See README.md for usage details 046 * 047 * @author Remi Malessa 048 * @author awoods 049 * @since 2019-11-15 050 */ 051@Command(name = "migration-utils", mixinStandardHelpOptions = true, sortOptions = false, 052 version = "Migration Utils - 4.4.1.b") 053public class PicocliMigrator implements Callable<Integer> { 054 055 private static final Logger LOGGER = getLogger(PicocliMigrator.class); 056 057 private enum F3SourceTypes { 058 AKUBRA, LEGACY, EXPORTED; 059 060 static F3SourceTypes toType(final String v) { 061 return valueOf(v.toUpperCase()); 062 } 063 } 064 065 private final String DEFAULT_PREFIX = "info:fedora/"; 066 067 @Option(names = {"--source-type", "-t"}, required = true, order = 1, 068 description = "Fedora 3 source type. Choices: akubra | legacy | exported") 069 private F3SourceTypes f3SourceType; 070 071 @Option(names = {"--datastreams-dir", "-d"}, order = 2, 072 description = "Directory containing Fedora 3 datastreams (used with --source-type 'akubra' or 'legacy')") 073 private File f3DatastreamsDir; 074 075 @Option(names = {"--objects-dir", "-o"}, order = 3, 076 description = "Directory containing Fedora 3 objects (used with --source-type 'akubra' or 'legacy')") 077 private File f3ObjectsDir; 078 079 @Option(names = {"--exported-dir", "-e"}, order = 4, 080 description = "Directory containing Fedora 3 export (used with --source-type 'exported')") 081 private File f3ExportedDir; 082 083 @Option(names = {"--target-dir", "-a"}, required = true, order = 5, 084 description = "OCFL storage root directory (data/ocfl-root is created for migration-type FEDORA_OCFL)") 085 private File targetDir; 086 087 @Option(names = {"--working-dir", "-i"}, order = 6, 088 description = "Directory where supporting state will be written (cached index of datastreams, ...)") 089 private File workingDir; 090 091 @Option(names = {"--delete-inactive", "-I"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 18, 092 description = "Migrate objects and datastreams in the Inactive state as deleted. Default: false.") 093 private boolean deleteInactive; 094 095 @Option(names = {"--atomic-resources", "-A"}, defaultValue = "false", showDefaultValue = ALWAYS, 096 order = 19, 097 description = "Migrate objects and datastreams as atomic resources instead of archival groups") 098 private boolean atomicResources; 099 100 @Option(names = {"--migration-type", "-m"}, defaultValue = "FEDORA_OCFL", showDefaultValue = ALWAYS, order = 20, 101 description = "Type of OCFL objects to migrate to. Choices: FEDORA_OCFL | PLAIN_OCFL") 102 private MigrationType migrationType; 103 104 @Option(names = {"--id-prefix"}, defaultValue = DEFAULT_PREFIX, showDefaultValue = ALWAYS, order = 21, 105 description = "Only use this for PLAIN_OCFL migrations: Prefix to add to PIDs for OCFL object IDs" 106 + " - defaults to info:fedora/, like Fedora3") 107 private String idPrefix; 108 109 @Option(names = {"--foxml-file"}, defaultValue = "false", order = 22, 110 description = "Migrate FOXML file as a whole file, instead of creating property files. FOXML file will" 111 + " be migrated, then marked as deleted so it doesn't show up as an active file.") 112 private boolean foxmlFile; 113 114 @Option(names = {"--limit", "-l"}, defaultValue = "-1", order = 23, 115 description = "Limit number of objects to be processed.\n Default: no limit") 116 private int objectLimit; 117 118 @Option(names = {"--resume", "-r"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 24, 119 description = "Resume from last successfully migrated Fedora 3 object") 120 private boolean resume; 121 122 @Option(names = {"--continue-on-error", "-c"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 25, 123 description = "Continue to next PID if an error occurs (instead of exiting). Disabled by default.") 124 private boolean continueOnError; 125 126 @Option(names = {"--pid-file", "-p"}, order = 26, 127 description = "PID file listing which Fedora 3 objects to migrate") 128 private File pidFile; 129 130 @Option(names = {"--extensions", "-x"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 27, 131 description = "Add file extensions to migrated datastreams based on mimetype recorded in FOXML") 132 private boolean addExtensions; 133 134 @Option(names = {"--f3hostname", "-f"}, defaultValue = "fedora.info", showDefaultValue = ALWAYS, order = 28, 135 description = "Hostname of Fedora 3, used for replacing placeholder in 'E' and 'R' datastream URLs") 136 private String f3hostname; 137 138 @Option(names = {"--username", "-u"}, defaultValue = "fedoraAdmin", showDefaultValue = ALWAYS, order = 29, 139 description = "The username to associate with all of the migrated resources.") 140 private String user; 141 142 @Option(names = {"--user-uri", "-U"}, defaultValue = "info:fedora/fedoraAdmin", showDefaultValue = ALWAYS, 143 order = 30, description = "The username to associate with all of the migrated resources.") 144 private String userUri; 145 146 @Option(names = {"--algorithm"}, defaultValue = "sha512", showDefaultValue = ALWAYS, order = 31, 147 description = "The digest algorithm to use in the OCFL objects created. Either sha256 or sha512") 148 private String digestAlgorithm; 149 150 @Option(names = {"--no-checksum-validation"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 32, 151 description = "Disable validation that datastream content matches Fedora 3 checksum.") 152 private boolean disableChecksumValidation; 153 154 @Option(names = {"--enable-metrics"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 33, 155 description = "Enable gathering of metrics for a Prometheus instance. " + 156 "\nNote: this requires port 8080 to be free in order for Prometheus to scrape metrics.") 157 private boolean enableMetrics; 158 159 @Option(names = {"--disable-dc"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 36, 160 description = "Disable migrating DC datastream into RDF object properties. ") 161 private boolean disableDc; 162 163 164 @Option(names = {"--head-only", "-H"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 35, 165 description = "Migrate only the HEAD of each datastream") 166 private boolean headOnly; 167 168 @Option(names = {"--debug"}, order = 34, description = "Enables debug logging") 169 private boolean debug; 170 171 private File indexDir; 172 173 private File ocflStorageDir; 174 175 /** 176 * @param args Command line arguments 177 */ 178 public static void main(final String[] args) { 179 final PicocliMigrator migrator = new PicocliMigrator(); 180 final CommandLine cmd = new CommandLine(migrator); 181 cmd.registerConverter(F3SourceTypes.class, F3SourceTypes::toType); 182 cmd.setExecutionExceptionHandler(new PicoliMigrationExceptionHandler(migrator)); 183 184 cmd.execute(args); 185 } 186 187 private static class PicoliMigrationExceptionHandler implements CommandLine.IExecutionExceptionHandler { 188 189 private final PicocliMigrator migrator; 190 191 PicoliMigrationExceptionHandler(final PicocliMigrator migrator) { 192 this.migrator = migrator; 193 } 194 195 @Override 196 public int handleExecutionException( 197 final Exception ex, 198 final CommandLine commandLine, 199 final CommandLine.ParseResult parseResult) { 200 commandLine.getErr().println(ex.getMessage()); 201 if (migrator.debug) { 202 ex.printStackTrace(commandLine.getErr()); 203 } 204 commandLine.usage(commandLine.getErr()); 205 return commandLine.getCommandSpec().exitCodeOnExecutionException(); 206 } 207 } 208 209 private static void setDebugLogLevel() { 210 final LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory(); 211 final ch.qos.logback.classic.Logger logger = loggerContext.getLogger("org.fcrepo.migration"); 212 logger.setLevel(Level.toLevel("DEBUG")); 213 } 214 215 @Override 216 public Integer call() throws Exception { 217 218 // Set debug log level if requested 219 if (debug) { 220 setDebugLogLevel(); 221 } 222 223 if (migrationType == MigrationType.FEDORA_OCFL && !idPrefix.equals(DEFAULT_PREFIX)) { 224 throw new IllegalArgumentException("Can't change the ID Prefix for FEDORA_OCFL migrations"); 225 } 226 227 if (!digestAlgorithm.equals("sha512") && !digestAlgorithm.equalsIgnoreCase("sha256")) { 228 throw new IllegalArgumentException("Invalid algorithm specified, must be one of sha512 or sha256"); 229 } 230 231 if (headOnly && atomicResources) { 232 throw new IllegalArgumentException("Atomic migrations currently do not support the head only option"); 233 } 234 235 final DigestAlgorithm algorithm = DigestAlgorithmRegistry.getAlgorithm(digestAlgorithm); 236 notNull(algorithm, "Invalid algorithm specified, must be one of sha512 or sha256"); 237 238 // Pre-processing directory verification 239 notNull(targetDir, "targetDir must be provided!"); 240 if (!targetDir.exists()) { 241 targetDir.mkdirs(); 242 } 243 244 if (workingDir == null) { 245 LOGGER.info("No working-dir option passed in - using current directory."); 246 workingDir = new File(System.getProperty("user.dir")); 247 } 248 if (!workingDir.exists()) { 249 workingDir.mkdirs(); 250 } 251 indexDir = new File(workingDir, "index"); 252 253 if (migrationType == MigrationType.FEDORA_OCFL) { 254 // Fedora 6.0.0 expects a data/ocfl-root structure 255 ocflStorageDir = targetDir.toPath().resolve("data").resolve("ocfl-root").toFile(); 256 if (!ocflStorageDir.exists()) { 257 ocflStorageDir.mkdirs(); 258 } 259 } else { 260 ocflStorageDir = targetDir; 261 } 262 263 // Create Staging dir 264 final File ocflStagingDir = new File(workingDir, "staging"); 265 if (!ocflStagingDir.exists()) { 266 ocflStagingDir.mkdirs(); 267 } 268 269 // Create PID list dir 270 final File pidDir = new File(workingDir, "pid"); 271 if (!pidDir.exists()) { 272 pidDir.mkdirs(); 273 } 274 275 // Which F3 source are we using? - verify associated options 276 final ObjectSource objectSource; 277 InternalIDResolver idResolver = null; 278 switch (f3SourceType) { 279 case EXPORTED: 280 notNull(f3ExportedDir, "f3ExportDir must be used with 'exported' source!"); 281 282 objectSource = new ArchiveExportedFoxmlDirectoryObjectSource(f3ExportedDir, f3hostname); 283 break; 284 case AKUBRA: 285 notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!"); 286 notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!"); 287 expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " + 288 f3ObjectsDir.getAbsolutePath()); 289 290 idResolver = new AkubraFSIDResolver(indexDir, f3DatastreamsDir); 291 objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname); 292 break; 293 case LEGACY: 294 notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!"); 295 notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!"); 296 expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " + 297 f3ObjectsDir.getAbsolutePath()); 298 299 idResolver = new LegacyFSIDResolver(indexDir, f3DatastreamsDir); 300 objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname); 301 break; 302 default: 303 throw new RuntimeException("Should never happen"); 304 } 305 306 // setup HttpServer + micrometer for publishing metrics 307 final PrometheusActuator actuator = new PrometheusActuator(enableMetrics); 308 actuator.start(); 309 310 final OcflObjectSessionFactory ocflSessionFactory = new OcflSessionFactoryFactoryBean(ocflStorageDir.toPath(), 311 ocflStagingDir.toPath(), migrationType, user, userUri, algorithm, disableChecksumValidation) 312 .getObject(); 313 314 final FedoraObjectVersionHandler archiveGroupHandler = 315 new ArchiveGroupHandler( 316 ocflSessionFactory, migrationType, 317 atomicResources ? ResourceMigrationType.ATOMIC : ResourceMigrationType.ARCHIVAL, 318 addExtensions, deleteInactive, foxmlFile, 319 user, idPrefix, headOnly, disableChecksumValidation, disableDc); 320 final StreamingFedoraObjectHandler objectHandler = new ObjectAbstractionStreamingFedoraObjectHandler( 321 archiveGroupHandler); 322 323 // PID-list-managers 324 // - Resume PID manager: the second arg is "acceptAll". If resuming, we do not "acceptAll") 325 final ResumePidListManager resumeManager = new ResumePidListManager(pidDir, !resume); 326 327 // - PID-list manager 328 final UserProvidedPidListManager pidListManager = new UserProvidedPidListManager(pidFile); 329 330 final Migrator migrator = new Migrator(); 331 migrator.setLimit(objectLimit); 332 migrator.setSource(objectSource); 333 migrator.setHandler(objectHandler); 334 migrator.setResumePidListManager(resumeManager); 335 migrator.setUserProvidedPidListManager(pidListManager); 336 migrator.setContinueOnError(continueOnError); 337 338 // for some reason ARQ does not implicitly init on some systems 339 ARQ.init(); 340 341 try { 342 migrator.run(); 343 } finally { 344 ocflSessionFactory.close(); 345 if (idResolver != null) { 346 idResolver.close(); 347 } 348 FileUtils.deleteDirectory(ocflStagingDir); 349 actuator.stop(); 350 } 351 352 return 0; 353 } 354 355}