001/* 002 * Copyright 2015 DuraSpace, Inc. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.fcrepo.migration; 017 018import static edu.wisc.library.ocfl.api.util.Enforce.expressionTrue; 019import static edu.wisc.library.ocfl.api.util.Enforce.notNull; 020import static org.slf4j.LoggerFactory.getLogger; 021import static picocli.CommandLine.Help.Visibility.ALWAYS; 022 023import java.io.File; 024import java.util.concurrent.Callable; 025 026import org.fcrepo.migration.foxml.AkubraFSIDResolver; 027import org.fcrepo.migration.foxml.ArchiveExportedFoxmlDirectoryObjectSource; 028import org.fcrepo.migration.foxml.InternalIDResolver; 029import org.fcrepo.migration.foxml.LegacyFSIDResolver; 030import org.fcrepo.migration.foxml.NativeFoxmlDirectoryObjectSource; 031import org.fcrepo.migration.handlers.ObjectAbstractionStreamingFedoraObjectHandler; 032import org.fcrepo.migration.handlers.ocfl.ArchiveGroupHandler; 033import org.fcrepo.migration.metrics.PrometheusActuator; 034import org.fcrepo.migration.pidlist.ResumePidListManager; 035import org.fcrepo.migration.pidlist.UserProvidedPidListManager; 036import org.fcrepo.storage.ocfl.OcflObjectSessionFactory; 037 038import org.apache.commons.io.FileUtils; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042import ch.qos.logback.classic.Level; 043import ch.qos.logback.classic.LoggerContext; 044import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry; 045import edu.wisc.library.ocfl.api.model.DigestAlgorithm; 046import picocli.CommandLine; 047import picocli.CommandLine.Command; 048import picocli.CommandLine.Option; 049 050 051/** 052 * This class provides a simple CLI for running and configuring migration-utils 053 * - See README.md for usage details 054 * 055 * @author Remi Malessa 056 * @author awoods 057 * @since 2019-11-15 058 */ 059@Command(name = "migration-utils", mixinStandardHelpOptions = true, sortOptions = false, 060 version = "Migration Utils - 4.4.1.b") 061public class PicocliMigrator implements Callable<Integer> { 062 063 private static final Logger LOGGER = getLogger(PicocliMigrator.class); 064 065 private enum F3SourceTypes { 066 AKUBRA, LEGACY, EXPORTED; 067 068 static F3SourceTypes toType(final String v) { 069 return valueOf(v.toUpperCase()); 070 } 071 } 072 073 private final String DEFAULT_PREFIX = "info:fedora/"; 074 075 @Option(names = {"--source-type", "-t"}, required = true, order = 1, 076 description = "Fedora 3 source type. Choices: akubra | legacy | exported") 077 private F3SourceTypes f3SourceType; 078 079 @Option(names = {"--datastreams-dir", "-d"}, order = 2, 080 description = "Directory containing Fedora 3 datastreams (used with --source-type 'akubra' or 'legacy')") 081 private File f3DatastreamsDir; 082 083 @Option(names = {"--objects-dir", "-o"}, order = 3, 084 description = "Directory containing Fedora 3 objects (used with --source-type 'akubra' or 'legacy')") 085 private File f3ObjectsDir; 086 087 @Option(names = {"--exported-dir", "-e"}, order = 4, 088 description = "Directory containing Fedora 3 export (used with --source-type 'exported')") 089 private File f3ExportedDir; 090 091 @Option(names = {"--target-dir", "-a"}, required = true, order = 5, 092 description = "OCFL storage root directory (data/ocfl-root is created for migration-type FEDORA_OCFL)") 093 private File targetDir; 094 095 @Option(names = {"--working-dir", "-i"}, order = 6, 096 description = "Directory where supporting state will be written (cached index of datastreams, ...)") 097 private File workingDir; 098 099 @Option(names = {"--delete-inactive", "-I"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 18, 100 description = "Migrate objects and datastreams in the Inactive state as deleted. Default: false.") 101 private boolean deleteInactive; 102 103 @Option(names = {"--migration-type", "-m"}, defaultValue = "FEDORA_OCFL", showDefaultValue = ALWAYS, order = 19, 104 description = "Type of OCFL objects to migrate to. Choices: FEDORA_OCFL | PLAIN_OCFL") 105 private MigrationType migrationType; 106 107 @Option(names = {"--id-prefix"}, defaultValue = DEFAULT_PREFIX, showDefaultValue = ALWAYS, order = 20, 108 description = "Only use this for PLAIN_OCFL migrations: Prefix to add to PIDs for OCFL object IDs" 109 + " - defaults to info:fedora/, like Fedora3") 110 private String idPrefix; 111 112 @Option(names = {"--foxml-file"}, defaultValue = "false", order = 21, 113 description = "Migrate FOXML file as a whole file, instead of creating property files. FOXML file will" 114 + " be migrated, then marked as deleted so it doesn't show up as an active file.") 115 private boolean foxmlFile; 116 117 @Option(names = {"--limit", "-l"}, defaultValue = "-1", order = 22, 118 description = "Limit number of objects to be processed.\n Default: no limit") 119 private int objectLimit; 120 121 @Option(names = {"--resume", "-r"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 23, 122 description = "Resume from last successfully migrated Fedora 3 object") 123 private boolean resume; 124 125 @Option(names = {"--continue-on-error", "-c"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 24, 126 description = "Continue to next PID if an error occurs (instead of exiting). Disabled by default.") 127 private boolean continueOnError; 128 129 @Option(names = {"--pid-file", "-p"}, order = 25, 130 description = "PID file listing which Fedora 3 objects to migrate") 131 private File pidFile; 132 133 @Option(names = {"--extensions", "-x"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 26, 134 description = "Add file extensions to migrated datastreams based on mimetype recorded in FOXML") 135 private boolean addExtensions; 136 137 @Option(names = {"--f3hostname", "-f"}, defaultValue = "fedora.info", showDefaultValue = ALWAYS, order = 27, 138 description = "Hostname of Fedora 3, used for replacing placeholder in 'E' and 'R' datastream URLs") 139 private String f3hostname; 140 141 @Option(names = {"--username", "-u"}, defaultValue = "fedoraAdmin", showDefaultValue = ALWAYS, order = 28, 142 description = "The username to associate with all of the migrated resources.") 143 private String user; 144 145 @Option(names = {"--user-uri", "-U"}, defaultValue = "info:fedora/fedoraAdmin", showDefaultValue = ALWAYS, 146 order = 29, description = "The username to associate with all of the migrated resources.") 147 private String userUri; 148 149 @Option(names = {"--algorithm"}, defaultValue = "sha512", showDefaultValue = ALWAYS, order = 30, 150 description = "The digest algorithm to use in the OCFL objects created. Either sha256 or sha512") 151 private String digestAlgorithm; 152 153 @Option(names = {"--no-checksum-validation"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 31, 154 description = "Disable validation that datastream content matches Fedora 3 checksum.") 155 private boolean disableChecksumValidation; 156 157 @Option(names = {"--enable-metrics"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 32, 158 description = "Enable gathering of metrics for a Prometheus instance. " + 159 "\nNote: this requires port 8080 to be free in order for Prometheus to scrape metrics.") 160 private boolean enableMetrics; 161 162 @Option(names = {"--debug"}, order = 32, description = "Enables debug logging") 163 private boolean debug; 164 165 private File indexDir; 166 167 private File ocflStorageDir; 168 169 /** 170 * @param args Command line arguments 171 */ 172 public static void main(final String[] args) { 173 final PicocliMigrator migrator = new PicocliMigrator(); 174 final CommandLine cmd = new CommandLine(migrator); 175 cmd.registerConverter(F3SourceTypes.class, F3SourceTypes::toType); 176 cmd.setExecutionExceptionHandler(new PicoliMigrationExceptionHandler(migrator)); 177 178 cmd.execute(args); 179 } 180 181 private static class PicoliMigrationExceptionHandler implements CommandLine.IExecutionExceptionHandler { 182 183 private final PicocliMigrator migrator; 184 185 PicoliMigrationExceptionHandler(final PicocliMigrator migrator) { 186 this.migrator = migrator; 187 } 188 189 @Override 190 public int handleExecutionException( 191 final Exception ex, 192 final CommandLine commandLine, 193 final CommandLine.ParseResult parseResult) { 194 commandLine.getErr().println(ex.getMessage()); 195 if (migrator.debug) { 196 ex.printStackTrace(commandLine.getErr()); 197 } 198 commandLine.usage(commandLine.getErr()); 199 return commandLine.getCommandSpec().exitCodeOnExecutionException(); 200 } 201 } 202 203 private static void setDebugLogLevel() { 204 final LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory(); 205 final ch.qos.logback.classic.Logger logger = loggerContext.getLogger("org.fcrepo.migration"); 206 logger.setLevel(Level.toLevel("DEBUG")); 207 } 208 209 @Override 210 public Integer call() throws Exception { 211 212 // Set debug log level if requested 213 if (debug) { 214 setDebugLogLevel(); 215 } 216 217 if (migrationType == MigrationType.FEDORA_OCFL && !idPrefix.equals(DEFAULT_PREFIX)) { 218 throw new IllegalArgumentException("Can't change the ID Prefix for FEDORA_OCFL migrations"); 219 } 220 221 if (!digestAlgorithm.equals("sha512") && !digestAlgorithm.equalsIgnoreCase("sha256")) { 222 throw new IllegalArgumentException("Invalid algorithm specified, must be one of sha512 or sha256"); 223 } 224 final DigestAlgorithm algorithm = DigestAlgorithmRegistry.getAlgorithm(digestAlgorithm); 225 notNull(algorithm, "Invalid algorithm specified, must be one of sha512 or sha256"); 226 227 // Pre-processing directory verification 228 notNull(targetDir, "targetDir must be provided!"); 229 if (!targetDir.exists()) { 230 targetDir.mkdirs(); 231 } 232 233 if (workingDir == null) { 234 LOGGER.info("No working-dir option passed in - using current directory."); 235 workingDir = new File(System.getProperty("user.dir")); 236 } 237 if (!workingDir.exists()) { 238 workingDir.mkdirs(); 239 } 240 indexDir = new File(workingDir, "index"); 241 242 if (migrationType == MigrationType.FEDORA_OCFL) { 243 // Fedora 6.0.0 expects a data/ocfl-root structure 244 ocflStorageDir = targetDir.toPath().resolve("data").resolve("ocfl-root").toFile(); 245 if (!ocflStorageDir.exists()) { 246 ocflStorageDir.mkdirs(); 247 } 248 } else { 249 ocflStorageDir = targetDir; 250 } 251 252 // Create Staging dir 253 final File ocflStagingDir = new File(workingDir, "staging"); 254 if (!ocflStagingDir.exists()) { 255 ocflStagingDir.mkdirs(); 256 } 257 258 // Create PID list dir 259 final File pidDir = new File(workingDir, "pid"); 260 if (!pidDir.exists()) { 261 pidDir.mkdirs(); 262 } 263 264 // Which F3 source are we using? - verify associated options 265 final ObjectSource objectSource; 266 InternalIDResolver idResolver = null; 267 switch (f3SourceType) { 268 case EXPORTED: 269 notNull(f3ExportedDir, "f3ExportDir must be used with 'exported' source!"); 270 271 objectSource = new ArchiveExportedFoxmlDirectoryObjectSource(f3ExportedDir, f3hostname); 272 break; 273 case AKUBRA: 274 notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!"); 275 notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!"); 276 expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " + 277 f3ObjectsDir.getAbsolutePath()); 278 279 idResolver = new AkubraFSIDResolver(indexDir, f3DatastreamsDir); 280 objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname); 281 break; 282 case LEGACY: 283 notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!"); 284 notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!"); 285 expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " + 286 f3ObjectsDir.getAbsolutePath()); 287 288 idResolver = new LegacyFSIDResolver(indexDir, f3DatastreamsDir); 289 objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname); 290 break; 291 default: 292 throw new RuntimeException("Should never happen"); 293 } 294 295 // setup HttpServer + micrometer for publishing metrics 296 final PrometheusActuator actuator = new PrometheusActuator(enableMetrics); 297 actuator.start(); 298 299 final OcflObjectSessionFactory ocflSessionFactory = new OcflSessionFactoryFactoryBean(ocflStorageDir.toPath(), 300 ocflStagingDir.toPath(), migrationType, user, userUri, algorithm, disableChecksumValidation) 301 .getObject(); 302 303 final FedoraObjectVersionHandler archiveGroupHandler = 304 new ArchiveGroupHandler(ocflSessionFactory, migrationType, addExtensions, deleteInactive, foxmlFile, 305 user, idPrefix, disableChecksumValidation); 306 final StreamingFedoraObjectHandler objectHandler = new ObjectAbstractionStreamingFedoraObjectHandler( 307 archiveGroupHandler); 308 309 // PID-list-managers 310 // - Resume PID manager: the second arg is "acceptAll". If resuming, we do not "acceptAll") 311 final ResumePidListManager resumeManager = new ResumePidListManager(pidDir, !resume); 312 313 // - PID-list manager 314 final UserProvidedPidListManager pidListManager = new UserProvidedPidListManager(pidFile); 315 316 final Migrator migrator = new Migrator(); 317 migrator.setLimit(objectLimit); 318 migrator.setSource(objectSource); 319 migrator.setHandler(objectHandler); 320 migrator.setResumePidListManager(resumeManager); 321 migrator.setUserProvidedPidListManager(pidListManager); 322 migrator.setContinueOnError(continueOnError); 323 324 try { 325 migrator.run(); 326 } finally { 327 ocflSessionFactory.close(); 328 if (idResolver != null) { 329 idResolver.close(); 330 } 331 FileUtils.deleteDirectory(ocflStagingDir); 332 actuator.stop(); 333 } 334 335 return 0; 336 } 337 338}