001/* 002 * Copyright 2015 DuraSpace, Inc. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.fcrepo.migration; 017 018import ch.qos.logback.classic.Level; 019import ch.qos.logback.classic.LoggerContext; 020import org.apache.commons.io.FileUtils; 021import org.fcrepo.migration.foxml.AkubraFSIDResolver; 022import org.fcrepo.migration.foxml.ArchiveExportedFoxmlDirectoryObjectSource; 023import org.fcrepo.migration.foxml.InternalIDResolver; 024import org.fcrepo.migration.foxml.LegacyFSIDResolver; 025import org.fcrepo.migration.foxml.NativeFoxmlDirectoryObjectSource; 026import org.fcrepo.migration.handlers.ObjectAbstractionStreamingFedoraObjectHandler; 027import org.fcrepo.migration.handlers.VersionAbstractionFedoraObjectHandler; 028import org.fcrepo.migration.handlers.ocfl.ArchiveGroupHandler; 029import org.fcrepo.migration.pidlist.PidListManager; 030import org.fcrepo.migration.pidlist.ResumePidListManager; 031import org.fcrepo.migration.pidlist.UserProvidedPidListManager; 032import org.fcrepo.storage.ocfl.OcflObjectSessionFactory; 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035import picocli.CommandLine; 036import picocli.CommandLine.Command; 037import picocli.CommandLine.Option; 038 039import java.io.File; 040import java.util.Arrays; 041import java.util.List; 042import java.util.concurrent.Callable; 043 044import static edu.wisc.library.ocfl.api.util.Enforce.expressionTrue; 045import static edu.wisc.library.ocfl.api.util.Enforce.notNull; 046import static org.slf4j.LoggerFactory.getLogger; 047import static picocli.CommandLine.Help.Visibility.ALWAYS; 048 049 050/** 051 * This class provides a simple CLI for running and configuring migration-utils 052 * - See README.md for usage details 053 * 054 * @author Remi Malessa 055 * @author awoods 056 * @since 2019-11-15 057 */ 058@Command(name = "migration-utils", mixinStandardHelpOptions = true, sortOptions = false, 059 version = "Migration Utils - 4.4.1.b") 060public class PicocliMigrator implements Callable<Integer> { 061 062 private static final Logger LOGGER = getLogger(PicocliMigrator.class); 063 064 private enum F3SourceTypes { 065 AKUBRA, LEGACY, EXPORTED; 066 067 static F3SourceTypes toType(final String v) { 068 return valueOf(v.toUpperCase()); 069 } 070 } 071 072 @Option(names = {"--source-type", "-t"}, required = true, order = 1, 073 description = "Fedora 3 source type. Choices: akubra | legacy | exported") 074 private F3SourceTypes f3SourceType; 075 076 @Option(names = {"--datastreams-dir", "-d"}, order = 2, 077 description = "Directory containing Fedora 3 datastreams (used with --source-type 'akubra' or 'legacy')") 078 private File f3DatastreamsDir; 079 080 @Option(names = {"--objects-dir", "-o"}, order = 3, 081 description = "Directory containing Fedora 3 objects (used with --source-type 'akubra' or 'legacy')") 082 private File f3ObjectsDir; 083 084 @Option(names = {"--exported-dir", "-e"}, order = 4, 085 description = "Directory containing Fedora 3 export (used with --source-type 'exported')") 086 private File f3ExportedDir; 087 088 @Option(names = {"--target-dir", "-a"}, required = true, order = 5, 089 description = "Directory where OCFL storage root and supporting state will be written") 090 private File targetDir; 091 092 @Option(names = {"--delete-inactive", "-I"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 18, 093 description = "Migrate objects and datastreams in the Inactive state as deleted. Default: false.") 094 private boolean deleteInactive; 095 096 @Option(names = {"--migration-type", "-m"}, defaultValue = "FEDORA_OCFL", showDefaultValue = ALWAYS, order = 19, 097 description = "Type of OCFL objects to migrate to. Choices: FEDORA_OCFL | PLAIN_OCFL") 098 private MigrationType migrationType; 099 100 @Option(names = {"--limit", "-l"}, defaultValue = "-1", order = 21, 101 description = "Limit number of objects to be processed.\n Default: no limit") 102 private int objectLimit; 103 104 @Option(names = {"--resume", "-r"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 22, 105 description = "Resume from last successfully migrated Fedora 3 object") 106 private boolean resume; 107 108 @Option(names = {"--continue-on-error", "-c"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 23, 109 description = "Continue to next PID if an error occurs (instead of exiting). Disabled by default.") 110 private boolean continueOnError; 111 112 @Option(names = {"--pid-file", "-p"}, order = 24, 113 description = "PID file listing which Fedora 3 objects to migrate") 114 private File pidFile; 115 116 @Option(names = {"--index-dir", "-i"}, order = 25, 117 description = "Directory where cached index of datastreams (will reuse index if already exists)") 118 private File indexDir; 119 120 @Option(names = {"--extensions", "-x"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 26, 121 description = "Add file extensions to migrated datastreams based on mimetype recorded in FOXML") 122 private boolean addExtensions; 123 124 @Option(names = {"--f3hostname", "-f"}, defaultValue = "fedora.info", showDefaultValue = ALWAYS, order = 27, 125 description = "Hostname of Fedora 3, used for replacing placeholder in 'E' and 'R' datastream URLs") 126 private String f3hostname; 127 128 @Option(names = {"--username", "-u"}, defaultValue = "fedoraAdmin", showDefaultValue = ALWAYS, order = 28, 129 description = "The username to associate with all of the migrated resources.") 130 private String user; 131 132 @Option(names = {"--user-uri", "-U"}, defaultValue = "info:fedora/fedoraAdmin", showDefaultValue = ALWAYS, 133 order = 29, description = "The username to associate with all of the migrated resources.") 134 private String userUri; 135 136 @Option(names = {"--debug"}, order = 30, description = "Enables debug logging") 137 private boolean debug; 138 139 140 /** 141 * @param args Command line arguments 142 */ 143 public static void main(final String[] args) { 144 final PicocliMigrator migrator = new PicocliMigrator(); 145 final CommandLine cmd = new CommandLine(migrator); 146 cmd.registerConverter(F3SourceTypes.class, F3SourceTypes::toType); 147 cmd.setExecutionExceptionHandler(new PicoliMigrationExceptionHandler(migrator)); 148 149 cmd.execute(args); 150 } 151 152 private static class PicoliMigrationExceptionHandler implements CommandLine.IExecutionExceptionHandler { 153 154 private final PicocliMigrator migrator; 155 156 PicoliMigrationExceptionHandler(final PicocliMigrator migrator) { 157 this.migrator = migrator; 158 } 159 160 @Override 161 public int handleExecutionException( 162 final Exception ex, 163 final CommandLine commandLine, 164 final CommandLine.ParseResult parseResult) { 165 commandLine.getErr().println(ex.getMessage()); 166 if (migrator.debug) { 167 ex.printStackTrace(commandLine.getErr()); 168 } 169 commandLine.usage(commandLine.getErr()); 170 return commandLine.getCommandSpec().exitCodeOnExecutionException(); 171 } 172 } 173 174 private static void setDebugLogLevel() { 175 final LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory(); 176 final ch.qos.logback.classic.Logger logger = loggerContext.getLogger("org.fcrepo.migration"); 177 logger.setLevel(Level.toLevel("DEBUG")); 178 } 179 180 @Override 181 public Integer call() throws Exception { 182 183 // Set debug log level if requested 184 if (debug) { 185 setDebugLogLevel(); 186 } 187 188 // Pre-processing directory verification 189 notNull(targetDir, "targetDir must be provided!"); 190 if (!targetDir.exists()) { 191 targetDir.mkdirs(); 192 } 193 194 // Fedora 6.0.0 expects a data directory at the top. 195 final File dataDir = targetDir.toPath().resolve("data").toFile(); 196 if (!dataDir.exists()) { 197 dataDir.mkdirs(); 198 } 199 200 // Create OCFL Storage dir 201 final File ocflStorageDir = new File(dataDir, "ocfl-root"); 202 if (!ocflStorageDir.exists()) { 203 ocflStorageDir.mkdirs(); 204 } 205 206 // Create Staging dir 207 final File ocflStagingDir = new File(dataDir, "staging"); 208 if (!ocflStagingDir.exists()) { 209 ocflStagingDir.mkdirs(); 210 } 211 212 // Create PID list dir 213 final File pidDir = new File(targetDir, "pid"); 214 if (!pidDir.exists()) { 215 pidDir.mkdirs(); 216 } 217 218 // Which F3 source are we using? - verify associated options 219 final ObjectSource objectSource; 220 final InternalIDResolver idResolver; 221 switch (f3SourceType) { 222 case EXPORTED: 223 notNull(f3ExportedDir, "f3ExportDir must be used with 'exported' source!"); 224 225 objectSource = new ArchiveExportedFoxmlDirectoryObjectSource(f3ExportedDir, f3hostname); 226 break; 227 case AKUBRA: 228 notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!"); 229 notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!"); 230 expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " + 231 f3ObjectsDir.getAbsolutePath()); 232 233 idResolver = new AkubraFSIDResolver(indexDir, f3DatastreamsDir); 234 objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname); 235 break; 236 case LEGACY: 237 notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!"); 238 notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!"); 239 expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " + 240 f3ObjectsDir.getAbsolutePath()); 241 242 idResolver = new LegacyFSIDResolver(indexDir, f3DatastreamsDir); 243 objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname); 244 break; 245 default: 246 throw new RuntimeException("Should never happen"); 247 } 248 249 final OcflObjectSessionFactory ocflSessionFactory = new OcflSessionFactoryFactoryBean(ocflStorageDir.toPath(), 250 ocflStagingDir.toPath(), migrationType, user, userUri).getObject(); 251 252 final FedoraObjectVersionHandler archiveGroupHandler = 253 new ArchiveGroupHandler(ocflSessionFactory, migrationType, addExtensions, deleteInactive, user); 254 final FedoraObjectHandler versionHandler = new VersionAbstractionFedoraObjectHandler(archiveGroupHandler); 255 final StreamingFedoraObjectHandler objectHandler = new ObjectAbstractionStreamingFedoraObjectHandler( 256 versionHandler); 257 258 // PID-list-managers 259 // - Resume PID manager: the second arg is "acceptAll". If resuming, we do not "acceptAll") 260 final ResumePidListManager resumeManager = new ResumePidListManager(pidDir, !resume); 261 262 // - PID-list manager 263 final UserProvidedPidListManager pidListManager = new UserProvidedPidListManager(pidFile); 264 265 final List<PidListManager> pidListManagerList = Arrays.asList(pidListManager, resumeManager); 266 267 final Migrator migrator = new Migrator(); 268 migrator.setLimit(objectLimit); 269 migrator.setSource(objectSource); 270 migrator.setHandler(objectHandler); 271 migrator.setPidListManagers(pidListManagerList); 272 migrator.setContinueOnError(continueOnError); 273 274 try { 275 migrator.run(); 276 } finally { 277 ocflSessionFactory.close(); 278 FileUtils.deleteDirectory(ocflStagingDir); 279 } 280 281 return 0; 282 } 283 284}