001/**
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 *
006 */
007package org.fcrepo.migration;
008
009import static edu.wisc.library.ocfl.api.util.Enforce.expressionTrue;
010import static edu.wisc.library.ocfl.api.util.Enforce.notNull;
011import static org.slf4j.LoggerFactory.getLogger;
012import static picocli.CommandLine.Help.Visibility.ALWAYS;
013
014import java.io.File;
015import java.util.concurrent.Callable;
016
017import org.apache.jena.query.ARQ;
018import org.fcrepo.migration.foxml.AkubraFSIDResolver;
019import org.fcrepo.migration.foxml.ArchiveExportedFoxmlDirectoryObjectSource;
020import org.fcrepo.migration.foxml.InternalIDResolver;
021import org.fcrepo.migration.foxml.LegacyFSIDResolver;
022import org.fcrepo.migration.foxml.NativeFoxmlDirectoryObjectSource;
023import org.fcrepo.migration.handlers.ObjectAbstractionStreamingFedoraObjectHandler;
024import org.fcrepo.migration.handlers.ocfl.ArchiveGroupHandler;
025import org.fcrepo.migration.metrics.PrometheusActuator;
026import org.fcrepo.migration.pidlist.ResumePidListManager;
027import org.fcrepo.migration.pidlist.UserProvidedPidListManager;
028import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
029
030import org.apache.commons.io.FileUtils;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import ch.qos.logback.classic.Level;
035import ch.qos.logback.classic.LoggerContext;
036import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry;
037import edu.wisc.library.ocfl.api.model.DigestAlgorithm;
038import picocli.CommandLine;
039import picocli.CommandLine.Command;
040import picocli.CommandLine.Option;
041
042
043/**
044 * This class provides a simple CLI for running and configuring migration-utils
045 * - See README.md for usage details
046 *
047 * @author Remi Malessa
048 * @author awoods
049 * @since 2019-11-15
050 */
051@Command(name = "migration-utils", mixinStandardHelpOptions = true, sortOptions = false,
052        version = "Migration Utils - 4.4.1.b")
053public class PicocliMigrator implements Callable<Integer> {
054
055    private static final Logger LOGGER = getLogger(PicocliMigrator.class);
056
057    private enum F3SourceTypes {
058        AKUBRA, LEGACY, EXPORTED;
059
060        static F3SourceTypes toType(final String v) {
061            return valueOf(v.toUpperCase());
062        }
063    }
064
065    private final String DEFAULT_PREFIX = "info:fedora/";
066
067    @Option(names = {"--source-type", "-t"}, required = true, order = 1,
068            description = "Fedora 3 source type. Choices: akubra | legacy | exported")
069    private F3SourceTypes f3SourceType;
070
071    @Option(names = {"--datastreams-dir", "-d"}, order = 2,
072            description = "Directory containing Fedora 3 datastreams (used with --source-type 'akubra' or 'legacy')")
073    private File f3DatastreamsDir;
074
075    @Option(names = {"--objects-dir", "-o"}, order = 3,
076            description = "Directory containing Fedora 3 objects (used with --source-type 'akubra' or 'legacy')")
077    private File f3ObjectsDir;
078
079    @Option(names = {"--exported-dir", "-e"}, order = 4,
080            description = "Directory containing Fedora 3 export (used with --source-type 'exported')")
081    private File f3ExportedDir;
082
083    @Option(names = {"--target-dir", "-a"}, required = true, order = 5,
084            description = "OCFL storage root directory (data/ocfl-root is created for migration-type FEDORA_OCFL)")
085    private File targetDir;
086
087    @Option(names = {"--working-dir", "-i"}, order = 6,
088            description = "Directory where supporting state will be written (cached index of datastreams, ...)")
089    private File workingDir;
090
091    @Option(names = {"--delete-inactive", "-I"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 18,
092            description = "Migrate objects and datastreams in the Inactive state as deleted. Default: false.")
093    private boolean deleteInactive;
094
095    @Option(names = {"--atomic-resources", "-A"}, defaultValue = "false", showDefaultValue = ALWAYS,
096            order = 19,
097            description = "Migrate objects and datastreams as atomic resources instead of archival groups")
098    private boolean atomicResources;
099
100    @Option(names = {"--migration-type", "-m"}, defaultValue = "FEDORA_OCFL", showDefaultValue = ALWAYS, order = 20,
101            description = "Type of OCFL objects to migrate to. Choices: FEDORA_OCFL | PLAIN_OCFL")
102    private MigrationType migrationType;
103
104    @Option(names = {"--id-prefix"}, defaultValue = DEFAULT_PREFIX, showDefaultValue = ALWAYS, order = 21,
105            description = "Only use this for PLAIN_OCFL migrations: Prefix to add to PIDs for OCFL object IDs"
106                + " - defaults to info:fedora/, like Fedora3")
107    private String idPrefix;
108
109    @Option(names = {"--foxml-file"}, defaultValue = "false", order = 22,
110            description = "Migrate FOXML file as a whole file, instead of creating property files. FOXML file will"
111                + " be migrated, then marked as deleted so it doesn't show up as an active file.")
112    private boolean foxmlFile;
113
114    @Option(names = {"--limit", "-l"}, defaultValue = "-1", order = 23,
115            description = "Limit number of objects to be processed.\n  Default: no limit")
116    private int objectLimit;
117
118    @Option(names = {"--resume", "-r"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 24,
119            description = "Resume from last successfully migrated Fedora 3 object")
120    private boolean resume;
121
122    @Option(names = {"--continue-on-error", "-c"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 25,
123            description = "Continue to next PID if an error occurs (instead of exiting). Disabled by default.")
124    private boolean continueOnError;
125
126    @Option(names = {"--pid-file", "-p"}, order = 26,
127            description = "PID file listing which Fedora 3 objects to migrate")
128    private File pidFile;
129
130    @Option(names = {"--extensions", "-x"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 27,
131            description = "Add file extensions to migrated datastreams based on mimetype recorded in FOXML")
132    private boolean addExtensions;
133
134    @Option(names = {"--f3hostname", "-f"}, defaultValue = "fedora.info", showDefaultValue = ALWAYS, order = 28,
135            description = "Hostname of Fedora 3, used for replacing placeholder in 'E' and 'R' datastream URLs")
136    private String f3hostname;
137
138    @Option(names = {"--username", "-u"}, defaultValue = "fedoraAdmin", showDefaultValue = ALWAYS, order = 29,
139            description = "The username to associate with all of the migrated resources.")
140    private String user;
141
142    @Option(names = {"--user-uri", "-U"}, defaultValue = "info:fedora/fedoraAdmin", showDefaultValue = ALWAYS,
143            order = 30, description = "The username to associate with all of the migrated resources.")
144    private String userUri;
145
146    @Option(names = {"--algorithm"}, defaultValue = "sha512", showDefaultValue = ALWAYS, order = 31,
147            description = "The digest algorithm to use in the OCFL objects created. Either sha256 or sha512")
148    private String digestAlgorithm;
149
150    @Option(names = {"--no-checksum-validation"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 32,
151            description = "Disable validation that datastream content matches Fedora 3 checksum.")
152    private boolean disableChecksumValidation;
153
154    @Option(names = {"--enable-metrics"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 33,
155            description = "Enable gathering of metrics for a Prometheus instance. " +
156                          "\nNote: this requires port 8080 to be free in order for Prometheus to scrape metrics.")
157    private boolean enableMetrics;
158
159    @Option(names = {"--debug"}, order = 34, description = "Enables debug logging")
160    private boolean debug;
161
162    private File indexDir;
163
164    private File ocflStorageDir;
165
166    /**
167     * @param args Command line arguments
168     */
169    public static void main(final String[] args) {
170        final PicocliMigrator migrator = new PicocliMigrator();
171        final CommandLine cmd = new CommandLine(migrator);
172        cmd.registerConverter(F3SourceTypes.class, F3SourceTypes::toType);
173        cmd.setExecutionExceptionHandler(new PicoliMigrationExceptionHandler(migrator));
174
175        cmd.execute(args);
176    }
177
178    private static class PicoliMigrationExceptionHandler implements CommandLine.IExecutionExceptionHandler {
179
180        private final PicocliMigrator migrator;
181
182        PicoliMigrationExceptionHandler(final PicocliMigrator migrator) {
183            this.migrator = migrator;
184        }
185
186        @Override
187        public int handleExecutionException(
188                final Exception ex,
189                final CommandLine commandLine,
190                final CommandLine.ParseResult parseResult) {
191            commandLine.getErr().println(ex.getMessage());
192            if (migrator.debug) {
193                ex.printStackTrace(commandLine.getErr());
194            }
195            commandLine.usage(commandLine.getErr());
196            return commandLine.getCommandSpec().exitCodeOnExecutionException();
197        }
198    }
199
200    private static void setDebugLogLevel() {
201        final LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
202        final ch.qos.logback.classic.Logger logger = loggerContext.getLogger("org.fcrepo.migration");
203        logger.setLevel(Level.toLevel("DEBUG"));
204    }
205
206    @Override
207    public Integer call() throws Exception {
208
209        // Set debug log level if requested
210        if (debug) {
211            setDebugLogLevel();
212        }
213
214        if (migrationType == MigrationType.FEDORA_OCFL && !idPrefix.equals(DEFAULT_PREFIX)) {
215            throw new IllegalArgumentException("Can't change the ID Prefix for FEDORA_OCFL migrations");
216        }
217
218        if (!digestAlgorithm.equals("sha512") && !digestAlgorithm.equalsIgnoreCase("sha256")) {
219            throw new IllegalArgumentException("Invalid algorithm specified, must be one of sha512 or sha256");
220        }
221        final DigestAlgorithm algorithm = DigestAlgorithmRegistry.getAlgorithm(digestAlgorithm);
222        notNull(algorithm, "Invalid algorithm specified, must be one of sha512 or sha256");
223
224        // Pre-processing directory verification
225        notNull(targetDir, "targetDir must be provided!");
226        if (!targetDir.exists()) {
227            targetDir.mkdirs();
228        }
229
230        if (workingDir == null) {
231            LOGGER.info("No working-dir option passed in - using current directory.");
232            workingDir = new File(System.getProperty("user.dir"));
233        }
234        if (!workingDir.exists()) {
235            workingDir.mkdirs();
236        }
237        indexDir = new File(workingDir, "index");
238
239        if (migrationType == MigrationType.FEDORA_OCFL) {
240            // Fedora 6.0.0 expects a data/ocfl-root structure
241            ocflStorageDir = targetDir.toPath().resolve("data").resolve("ocfl-root").toFile();
242            if (!ocflStorageDir.exists()) {
243                ocflStorageDir.mkdirs();
244            }
245        } else {
246            ocflStorageDir = targetDir;
247        }
248
249        // Create Staging dir
250        final File ocflStagingDir = new File(workingDir, "staging");
251        if (!ocflStagingDir.exists()) {
252            ocflStagingDir.mkdirs();
253        }
254
255        // Create PID list dir
256        final File pidDir = new File(workingDir, "pid");
257        if (!pidDir.exists()) {
258            pidDir.mkdirs();
259        }
260
261        // Which F3 source are we using? - verify associated options
262        final ObjectSource objectSource;
263        InternalIDResolver idResolver = null;
264        switch (f3SourceType) {
265            case EXPORTED:
266                notNull(f3ExportedDir, "f3ExportDir must be used with 'exported' source!");
267
268                objectSource = new ArchiveExportedFoxmlDirectoryObjectSource(f3ExportedDir, f3hostname);
269                break;
270            case AKUBRA:
271                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
272                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
273                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
274                        f3ObjectsDir.getAbsolutePath());
275
276                idResolver = new AkubraFSIDResolver(indexDir, f3DatastreamsDir);
277                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
278                break;
279            case LEGACY:
280                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
281                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
282                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
283                        f3ObjectsDir.getAbsolutePath());
284
285                idResolver = new LegacyFSIDResolver(indexDir, f3DatastreamsDir);
286                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
287                break;
288            default:
289                throw new RuntimeException("Should never happen");
290        }
291
292        // setup HttpServer + micrometer for publishing metrics
293        final PrometheusActuator actuator = new PrometheusActuator(enableMetrics);
294        actuator.start();
295
296        final OcflObjectSessionFactory ocflSessionFactory = new OcflSessionFactoryFactoryBean(ocflStorageDir.toPath(),
297                ocflStagingDir.toPath(), migrationType, user, userUri, algorithm, disableChecksumValidation)
298                .getObject();
299
300        final FedoraObjectVersionHandler archiveGroupHandler =
301                new ArchiveGroupHandler(
302                        ocflSessionFactory, migrationType,
303                        atomicResources ? ResourceMigrationType.ATOMIC : ResourceMigrationType.ARCHIVAL,
304                        addExtensions, deleteInactive, foxmlFile,
305                        user, idPrefix, disableChecksumValidation);
306        final StreamingFedoraObjectHandler objectHandler = new ObjectAbstractionStreamingFedoraObjectHandler(
307                archiveGroupHandler);
308
309        // PID-list-managers
310        // - Resume PID manager: the second arg is "acceptAll". If resuming, we do not "acceptAll")
311        final ResumePidListManager resumeManager = new ResumePidListManager(pidDir, !resume);
312
313        // - PID-list manager
314        final UserProvidedPidListManager pidListManager = new UserProvidedPidListManager(pidFile);
315
316        final Migrator migrator = new Migrator();
317        migrator.setLimit(objectLimit);
318        migrator.setSource(objectSource);
319        migrator.setHandler(objectHandler);
320        migrator.setResumePidListManager(resumeManager);
321        migrator.setUserProvidedPidListManager(pidListManager);
322        migrator.setContinueOnError(continueOnError);
323
324        // for some reason ARQ does not implicitly init on some systems
325        ARQ.init();
326
327        try {
328            migrator.run();
329        } finally {
330            ocflSessionFactory.close();
331            if (idResolver != null) {
332                idResolver.close();
333            }
334            FileUtils.deleteDirectory(ocflStagingDir);
335            actuator.stop();
336        }
337
338        return 0;
339    }
340
341}