001/**
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 *
006 */
007package org.fcrepo.migration;
008
009import static edu.wisc.library.ocfl.api.util.Enforce.expressionTrue;
010import static edu.wisc.library.ocfl.api.util.Enforce.notNull;
011import static org.slf4j.LoggerFactory.getLogger;
012import static picocli.CommandLine.Help.Visibility.ALWAYS;
013
014import java.io.File;
015import java.util.concurrent.Callable;
016
017import org.apache.jena.query.ARQ;
018import org.fcrepo.migration.foxml.AkubraFSIDResolver;
019import org.fcrepo.migration.foxml.ArchiveExportedFoxmlDirectoryObjectSource;
020import org.fcrepo.migration.foxml.InternalIDResolver;
021import org.fcrepo.migration.foxml.LegacyFSIDResolver;
022import org.fcrepo.migration.foxml.NativeFoxmlDirectoryObjectSource;
023import org.fcrepo.migration.handlers.ObjectAbstractionStreamingFedoraObjectHandler;
024import org.fcrepo.migration.handlers.ocfl.ArchiveGroupHandler;
025import org.fcrepo.migration.metrics.PrometheusActuator;
026import org.fcrepo.migration.pidlist.ResumePidListManager;
027import org.fcrepo.migration.pidlist.UserProvidedPidListManager;
028import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
029
030import org.apache.commons.io.FileUtils;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import ch.qos.logback.classic.Level;
035import ch.qos.logback.classic.LoggerContext;
036import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry;
037import edu.wisc.library.ocfl.api.model.DigestAlgorithm;
038import picocli.CommandLine;
039import picocli.CommandLine.Command;
040import picocli.CommandLine.Option;
041
042
043/**
044 * This class provides a simple CLI for running and configuring migration-utils
045 * - See README.md for usage details
046 *
047 * @author Remi Malessa
048 * @author awoods
049 * @since 2019-11-15
050 */
051@Command(name = "migration-utils", mixinStandardHelpOptions = true, sortOptions = false,
052        version = "Migration Utils - 4.4.1.b")
053public class PicocliMigrator implements Callable<Integer> {
054
055    private static final Logger LOGGER = getLogger(PicocliMigrator.class);
056
057    private enum F3SourceTypes {
058        AKUBRA, LEGACY, EXPORTED;
059
060        static F3SourceTypes toType(final String v) {
061            return valueOf(v.toUpperCase());
062        }
063    }
064
065    private final String DEFAULT_PREFIX = "info:fedora/";
066
067    @Option(names = {"--source-type", "-t"}, required = true, order = 1,
068            description = "Fedora 3 source type. Choices: akubra | legacy | exported")
069    private F3SourceTypes f3SourceType;
070
071    @Option(names = {"--datastreams-dir", "-d"}, order = 2,
072            description = "Directory containing Fedora 3 datastreams (used with --source-type 'akubra' or 'legacy')")
073    private File f3DatastreamsDir;
074
075    @Option(names = {"--objects-dir", "-o"}, order = 3,
076            description = "Directory containing Fedora 3 objects (used with --source-type 'akubra' or 'legacy')")
077    private File f3ObjectsDir;
078
079    @Option(names = {"--exported-dir", "-e"}, order = 4,
080            description = "Directory containing Fedora 3 export (used with --source-type 'exported')")
081    private File f3ExportedDir;
082
083    @Option(names = {"--target-dir", "-a"}, required = true, order = 5,
084            description = "OCFL storage root directory (data/ocfl-root is created for migration-type FEDORA_OCFL)")
085    private File targetDir;
086
087    @Option(names = {"--working-dir", "-i"}, order = 6,
088            description = "Directory where supporting state will be written (cached index of datastreams, ...)")
089    private File workingDir;
090
091    @Option(names = {"--delete-inactive", "-I"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 18,
092            description = "Migrate objects and datastreams in the Inactive state as deleted. Default: false.")
093    private boolean deleteInactive;
094
095    @Option(names = {"--atomic-resources", "-A"}, defaultValue = "false", showDefaultValue = ALWAYS,
096            order = 19,
097            description = "Migrate objects and datastreams as atomic resources instead of archival groups")
098    private boolean atomicResources;
099
100    @Option(names = {"--migration-type", "-m"}, defaultValue = "FEDORA_OCFL", showDefaultValue = ALWAYS, order = 20,
101            description = "Type of OCFL objects to migrate to. Choices: FEDORA_OCFL | PLAIN_OCFL")
102    private MigrationType migrationType;
103
104    @Option(names = {"--id-prefix"}, defaultValue = DEFAULT_PREFIX, showDefaultValue = ALWAYS, order = 21,
105            description = "Only use this for PLAIN_OCFL migrations: Prefix to add to PIDs for OCFL object IDs"
106                + " - defaults to info:fedora/, like Fedora3")
107    private String idPrefix;
108
109    @Option(names = {"--foxml-file"}, defaultValue = "false", order = 22,
110            description = "Migrate FOXML file as a whole file, instead of creating property files. FOXML file will"
111                + " be migrated, then marked as deleted so it doesn't show up as an active file.")
112    private boolean foxmlFile;
113
114    @Option(names = {"--limit", "-l"}, defaultValue = "-1", order = 23,
115            description = "Limit number of objects to be processed.\n  Default: no limit")
116    private int objectLimit;
117
118    @Option(names = {"--resume", "-r"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 24,
119            description = "Resume from last successfully migrated Fedora 3 object")
120    private boolean resume;
121
122    @Option(names = {"--continue-on-error", "-c"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 25,
123            description = "Continue to next PID if an error occurs (instead of exiting). Disabled by default.")
124    private boolean continueOnError;
125
126    @Option(names = {"--pid-file", "-p"}, order = 26,
127            description = "PID file listing which Fedora 3 objects to migrate")
128    private File pidFile;
129
130    @Option(names = {"--extensions", "-x"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 27,
131            description = "Add file extensions to migrated datastreams based on mimetype recorded in FOXML")
132    private boolean addExtensions;
133
134    @Option(names = {"--f3hostname", "-f"}, defaultValue = "fedora.info", showDefaultValue = ALWAYS, order = 28,
135            description = "Hostname of Fedora 3, used for replacing placeholder in 'E' and 'R' datastream URLs")
136    private String f3hostname;
137
138    @Option(names = {"--username", "-u"}, defaultValue = "fedoraAdmin", showDefaultValue = ALWAYS, order = 29,
139            description = "The username to associate with all of the migrated resources.")
140    private String user;
141
142    @Option(names = {"--user-uri", "-U"}, defaultValue = "info:fedora/fedoraAdmin", showDefaultValue = ALWAYS,
143            order = 30, description = "The username to associate with all of the migrated resources.")
144    private String userUri;
145
146    @Option(names = {"--algorithm"}, defaultValue = "sha512", showDefaultValue = ALWAYS, order = 31,
147            description = "The digest algorithm to use in the OCFL objects created. Either sha256 or sha512")
148    private String digestAlgorithm;
149
150    @Option(names = {"--no-checksum-validation"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 32,
151            description = "Disable validation that datastream content matches Fedora 3 checksum.")
152    private boolean disableChecksumValidation;
153
154    @Option(names = {"--enable-metrics"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 33,
155            description = "Enable gathering of metrics for a Prometheus instance. " +
156                          "\nNote: this requires port 8080 to be free in order for Prometheus to scrape metrics.")
157    private boolean enableMetrics;
158
159    @Option(names = {"--head-only", "-H"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 35,
160            description = "Migrate only the HEAD of each datastream")
161    private boolean headOnly;
162
163    @Option(names = {"--debug"}, order = 34, description = "Enables debug logging")
164    private boolean debug;
165
166    private File indexDir;
167
168    private File ocflStorageDir;
169
170    /**
171     * @param args Command line arguments
172     */
173    public static void main(final String[] args) {
174        final PicocliMigrator migrator = new PicocliMigrator();
175        final CommandLine cmd = new CommandLine(migrator);
176        cmd.registerConverter(F3SourceTypes.class, F3SourceTypes::toType);
177        cmd.setExecutionExceptionHandler(new PicoliMigrationExceptionHandler(migrator));
178
179        cmd.execute(args);
180    }
181
182    private static class PicoliMigrationExceptionHandler implements CommandLine.IExecutionExceptionHandler {
183
184        private final PicocliMigrator migrator;
185
186        PicoliMigrationExceptionHandler(final PicocliMigrator migrator) {
187            this.migrator = migrator;
188        }
189
190        @Override
191        public int handleExecutionException(
192                final Exception ex,
193                final CommandLine commandLine,
194                final CommandLine.ParseResult parseResult) {
195            commandLine.getErr().println(ex.getMessage());
196            if (migrator.debug) {
197                ex.printStackTrace(commandLine.getErr());
198            }
199            commandLine.usage(commandLine.getErr());
200            return commandLine.getCommandSpec().exitCodeOnExecutionException();
201        }
202    }
203
204    private static void setDebugLogLevel() {
205        final LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
206        final ch.qos.logback.classic.Logger logger = loggerContext.getLogger("org.fcrepo.migration");
207        logger.setLevel(Level.toLevel("DEBUG"));
208    }
209
210    @Override
211    public Integer call() throws Exception {
212
213        // Set debug log level if requested
214        if (debug) {
215            setDebugLogLevel();
216        }
217
218        if (migrationType == MigrationType.FEDORA_OCFL && !idPrefix.equals(DEFAULT_PREFIX)) {
219            throw new IllegalArgumentException("Can't change the ID Prefix for FEDORA_OCFL migrations");
220        }
221
222        if (!digestAlgorithm.equals("sha512") && !digestAlgorithm.equalsIgnoreCase("sha256")) {
223            throw new IllegalArgumentException("Invalid algorithm specified, must be one of sha512 or sha256");
224        }
225
226        if (headOnly && atomicResources) {
227            throw new IllegalArgumentException("Atomic migrations currently do not support the head only option");
228        }
229
230        final DigestAlgorithm algorithm = DigestAlgorithmRegistry.getAlgorithm(digestAlgorithm);
231        notNull(algorithm, "Invalid algorithm specified, must be one of sha512 or sha256");
232
233        // Pre-processing directory verification
234        notNull(targetDir, "targetDir must be provided!");
235        if (!targetDir.exists()) {
236            targetDir.mkdirs();
237        }
238
239        if (workingDir == null) {
240            LOGGER.info("No working-dir option passed in - using current directory.");
241            workingDir = new File(System.getProperty("user.dir"));
242        }
243        if (!workingDir.exists()) {
244            workingDir.mkdirs();
245        }
246        indexDir = new File(workingDir, "index");
247
248        if (migrationType == MigrationType.FEDORA_OCFL) {
249            // Fedora 6.0.0 expects a data/ocfl-root structure
250            ocflStorageDir = targetDir.toPath().resolve("data").resolve("ocfl-root").toFile();
251            if (!ocflStorageDir.exists()) {
252                ocflStorageDir.mkdirs();
253            }
254        } else {
255            ocflStorageDir = targetDir;
256        }
257
258        // Create Staging dir
259        final File ocflStagingDir = new File(workingDir, "staging");
260        if (!ocflStagingDir.exists()) {
261            ocflStagingDir.mkdirs();
262        }
263
264        // Create PID list dir
265        final File pidDir = new File(workingDir, "pid");
266        if (!pidDir.exists()) {
267            pidDir.mkdirs();
268        }
269
270        // Which F3 source are we using? - verify associated options
271        final ObjectSource objectSource;
272        InternalIDResolver idResolver = null;
273        switch (f3SourceType) {
274            case EXPORTED:
275                notNull(f3ExportedDir, "f3ExportDir must be used with 'exported' source!");
276
277                objectSource = new ArchiveExportedFoxmlDirectoryObjectSource(f3ExportedDir, f3hostname);
278                break;
279            case AKUBRA:
280                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
281                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
282                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
283                        f3ObjectsDir.getAbsolutePath());
284
285                idResolver = new AkubraFSIDResolver(indexDir, f3DatastreamsDir);
286                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
287                break;
288            case LEGACY:
289                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
290                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
291                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
292                        f3ObjectsDir.getAbsolutePath());
293
294                idResolver = new LegacyFSIDResolver(indexDir, f3DatastreamsDir);
295                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
296                break;
297            default:
298                throw new RuntimeException("Should never happen");
299        }
300
301        // setup HttpServer + micrometer for publishing metrics
302        final PrometheusActuator actuator = new PrometheusActuator(enableMetrics);
303        actuator.start();
304
305        final OcflObjectSessionFactory ocflSessionFactory = new OcflSessionFactoryFactoryBean(ocflStorageDir.toPath(),
306                ocflStagingDir.toPath(), migrationType, user, userUri, algorithm, disableChecksumValidation)
307                .getObject();
308
309        final FedoraObjectVersionHandler archiveGroupHandler =
310                new ArchiveGroupHandler(
311                        ocflSessionFactory, migrationType,
312                        atomicResources ? ResourceMigrationType.ATOMIC : ResourceMigrationType.ARCHIVAL,
313                        addExtensions, deleteInactive, foxmlFile,
314                        user, idPrefix, headOnly, disableChecksumValidation);
315        final StreamingFedoraObjectHandler objectHandler = new ObjectAbstractionStreamingFedoraObjectHandler(
316                archiveGroupHandler);
317
318        // PID-list-managers
319        // - Resume PID manager: the second arg is "acceptAll". If resuming, we do not "acceptAll")
320        final ResumePidListManager resumeManager = new ResumePidListManager(pidDir, !resume);
321
322        // - PID-list manager
323        final UserProvidedPidListManager pidListManager = new UserProvidedPidListManager(pidFile);
324
325        final Migrator migrator = new Migrator();
326        migrator.setLimit(objectLimit);
327        migrator.setSource(objectSource);
328        migrator.setHandler(objectHandler);
329        migrator.setResumePidListManager(resumeManager);
330        migrator.setUserProvidedPidListManager(pidListManager);
331        migrator.setContinueOnError(continueOnError);
332
333        // for some reason ARQ does not implicitly init on some systems
334        ARQ.init();
335
336        try {
337            migrator.run();
338        } finally {
339            ocflSessionFactory.close();
340            if (idResolver != null) {
341                idResolver.close();
342            }
343            FileUtils.deleteDirectory(ocflStagingDir);
344            actuator.stop();
345        }
346
347        return 0;
348    }
349
350}