001/**
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 *
006 */
007package org.fcrepo.migration;
008
009import static edu.wisc.library.ocfl.api.util.Enforce.expressionTrue;
010import static edu.wisc.library.ocfl.api.util.Enforce.notNull;
011import static org.slf4j.LoggerFactory.getLogger;
012import static picocli.CommandLine.Help.Visibility.ALWAYS;
013
014import java.io.File;
015import java.util.concurrent.Callable;
016
017import org.apache.jena.query.ARQ;
018import org.fcrepo.migration.foxml.AkubraFSIDResolver;
019import org.fcrepo.migration.foxml.ArchiveExportedFoxmlDirectoryObjectSource;
020import org.fcrepo.migration.foxml.InternalIDResolver;
021import org.fcrepo.migration.foxml.LegacyFSIDResolver;
022import org.fcrepo.migration.foxml.NativeFoxmlDirectoryObjectSource;
023import org.fcrepo.migration.handlers.ObjectAbstractionStreamingFedoraObjectHandler;
024import org.fcrepo.migration.handlers.ocfl.ArchiveGroupHandler;
025import org.fcrepo.migration.metrics.PrometheusActuator;
026import org.fcrepo.migration.pidlist.ResumePidListManager;
027import org.fcrepo.migration.pidlist.UserProvidedPidListManager;
028import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
029
030import org.apache.commons.io.FileUtils;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import ch.qos.logback.classic.Level;
035import ch.qos.logback.classic.LoggerContext;
036import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry;
037import edu.wisc.library.ocfl.api.model.DigestAlgorithm;
038import picocli.CommandLine;
039import picocli.CommandLine.Command;
040import picocli.CommandLine.Option;
041
042
043/**
044 * This class provides a simple CLI for running and configuring migration-utils
045 * - See README.md for usage details
046 *
047 * @author Remi Malessa
048 * @author awoods
049 * @since 2019-11-15
050 */
051@Command(name = "migration-utils", mixinStandardHelpOptions = true, sortOptions = false,
052        version = "Migration Utils - 4.4.1.b")
053public class PicocliMigrator implements Callable<Integer> {
054
055    private static final Logger LOGGER = getLogger(PicocliMigrator.class);
056
057    private enum F3SourceTypes {
058        AKUBRA, LEGACY, EXPORTED;
059
060        static F3SourceTypes toType(final String v) {
061            return valueOf(v.toUpperCase());
062        }
063    }
064
065    private final String DEFAULT_PREFIX = "info:fedora/";
066
067    @Option(names = {"--source-type", "-t"}, required = true, order = 1,
068            description = "Fedora 3 source type. Choices: akubra | legacy | exported")
069    private F3SourceTypes f3SourceType;
070
071    @Option(names = {"--datastreams-dir", "-d"}, order = 2,
072            description = "Directory containing Fedora 3 datastreams (used with --source-type 'akubra' or 'legacy')")
073    private File f3DatastreamsDir;
074
075    @Option(names = {"--objects-dir", "-o"}, order = 3,
076            description = "Directory containing Fedora 3 objects (used with --source-type 'akubra' or 'legacy')")
077    private File f3ObjectsDir;
078
079    @Option(names = {"--exported-dir", "-e"}, order = 4,
080            description = "Directory containing Fedora 3 export (used with --source-type 'exported')")
081    private File f3ExportedDir;
082
083    @Option(names = {"--target-dir", "-a"}, required = true, order = 5,
084            description = "OCFL storage root directory (data/ocfl-root is created for migration-type FEDORA_OCFL)")
085    private File targetDir;
086
087    @Option(names = {"--working-dir", "-i"}, order = 6,
088            description = "Directory where supporting state will be written (cached index of datastreams, ...)")
089    private File workingDir;
090
091    @Option(names = {"--delete-inactive", "-I"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 18,
092            description = "Migrate objects and datastreams in the Inactive state as deleted. Default: false.")
093    private boolean deleteInactive;
094
095    @Option(names = {"--atomic-resources", "-A"}, defaultValue = "false", showDefaultValue = ALWAYS,
096            order = 19,
097            description = "Migrate objects and datastreams as atomic resources instead of archival groups")
098    private boolean atomicResources;
099
100    @Option(names = {"--migration-type", "-m"}, defaultValue = "FEDORA_OCFL", showDefaultValue = ALWAYS, order = 20,
101            description = "Type of OCFL objects to migrate to. Choices: FEDORA_OCFL | PLAIN_OCFL")
102    private MigrationType migrationType;
103
104    @Option(names = {"--id-prefix"}, defaultValue = DEFAULT_PREFIX, showDefaultValue = ALWAYS, order = 21,
105            description = "Only use this for PLAIN_OCFL migrations: Prefix to add to PIDs for OCFL object IDs"
106                + " - defaults to info:fedora/, like Fedora3")
107    private String idPrefix;
108
109    @Option(names = {"--foxml-file"}, defaultValue = "false", order = 22,
110            description = "Migrate FOXML file as a whole file, instead of creating property files. FOXML file will"
111                + " be migrated, then marked as deleted so it doesn't show up as an active file.")
112    private boolean foxmlFile;
113
114    @Option(names = {"--limit", "-l"}, defaultValue = "-1", order = 23,
115            description = "Limit number of objects to be processed.\n  Default: no limit")
116    private int objectLimit;
117
118    @Option(names = {"--resume", "-r"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 24,
119            description = "Resume from last successfully migrated Fedora 3 object")
120    private boolean resume;
121
122    @Option(names = {"--continue-on-error", "-c"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 25,
123            description = "Continue to next PID if an error occurs (instead of exiting). Disabled by default.")
124    private boolean continueOnError;
125
126    @Option(names = {"--pid-file", "-p"}, order = 26,
127            description = "PID file listing which Fedora 3 objects to migrate")
128    private File pidFile;
129
130    @Option(names = {"--extensions", "-x"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 27,
131            description = "Add file extensions to migrated datastreams based on mimetype recorded in FOXML")
132    private boolean addExtensions;
133
134    @Option(names = {"--f3hostname", "-f"}, defaultValue = "fedora.info", showDefaultValue = ALWAYS, order = 28,
135            description = "Hostname of Fedora 3, used for replacing placeholder in 'E' and 'R' datastream URLs")
136    private String f3hostname;
137
138    @Option(names = {"--username", "-u"}, defaultValue = "fedoraAdmin", showDefaultValue = ALWAYS, order = 29,
139            description = "The username to associate with all of the migrated resources.")
140    private String user;
141
142    @Option(names = {"--user-uri", "-U"}, defaultValue = "info:fedora/fedoraAdmin", showDefaultValue = ALWAYS,
143            order = 30, description = "The username to associate with all of the migrated resources.")
144    private String userUri;
145
146    @Option(names = {"--algorithm"}, defaultValue = "sha512", showDefaultValue = ALWAYS, order = 31,
147            description = "The digest algorithm to use in the OCFL objects created. Either sha256 or sha512")
148    private String digestAlgorithm;
149
150    @Option(names = {"--no-checksum-validation"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 32,
151            description = "Disable validation that datastream content matches Fedora 3 checksum.")
152    private boolean disableChecksumValidation;
153
154    @Option(names = {"--enable-metrics"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 33,
155            description = "Enable gathering of metrics for a Prometheus instance. " +
156                          "\nNote: this requires port 8080 to be free in order for Prometheus to scrape metrics.")
157    private boolean enableMetrics;
158
159    @Option(names = {"--disable-dc"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 36,
160            description = "Disable migrating DC datastream into RDF object properties. ")
161    private boolean disableDc;
162
163
164    @Option(names = {"--head-only", "-H"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 35,
165            description = "Migrate only the HEAD of each datastream")
166    private boolean headOnly;
167
168    @Option(names = {"--debug"}, order = 34, description = "Enables debug logging")
169    private boolean debug;
170
171    private File indexDir;
172
173    private File ocflStorageDir;
174
175    /**
176     * @param args Command line arguments
177     */
178    public static void main(final String[] args) {
179        final PicocliMigrator migrator = new PicocliMigrator();
180        final CommandLine cmd = new CommandLine(migrator);
181        cmd.registerConverter(F3SourceTypes.class, F3SourceTypes::toType);
182        cmd.setExecutionExceptionHandler(new PicoliMigrationExceptionHandler(migrator));
183
184        cmd.execute(args);
185    }
186
187    private static class PicoliMigrationExceptionHandler implements CommandLine.IExecutionExceptionHandler {
188
189        private final PicocliMigrator migrator;
190
191        PicoliMigrationExceptionHandler(final PicocliMigrator migrator) {
192            this.migrator = migrator;
193        }
194
195        @Override
196        public int handleExecutionException(
197                final Exception ex,
198                final CommandLine commandLine,
199                final CommandLine.ParseResult parseResult) {
200            commandLine.getErr().println(ex.getMessage());
201            if (migrator.debug) {
202                ex.printStackTrace(commandLine.getErr());
203            }
204            commandLine.usage(commandLine.getErr());
205            return commandLine.getCommandSpec().exitCodeOnExecutionException();
206        }
207    }
208
209    private static void setDebugLogLevel() {
210        final LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
211        final ch.qos.logback.classic.Logger logger = loggerContext.getLogger("org.fcrepo.migration");
212        logger.setLevel(Level.toLevel("DEBUG"));
213    }
214
215    @Override
216    public Integer call() throws Exception {
217
218        // Set debug log level if requested
219        if (debug) {
220            setDebugLogLevel();
221        }
222
223        if (migrationType == MigrationType.FEDORA_OCFL && !idPrefix.equals(DEFAULT_PREFIX)) {
224            throw new IllegalArgumentException("Can't change the ID Prefix for FEDORA_OCFL migrations");
225        }
226
227        if (!digestAlgorithm.equals("sha512") && !digestAlgorithm.equalsIgnoreCase("sha256")) {
228            throw new IllegalArgumentException("Invalid algorithm specified, must be one of sha512 or sha256");
229        }
230
231        if (headOnly && atomicResources) {
232            throw new IllegalArgumentException("Atomic migrations currently do not support the head only option");
233        }
234
235        final DigestAlgorithm algorithm = DigestAlgorithmRegistry.getAlgorithm(digestAlgorithm);
236        notNull(algorithm, "Invalid algorithm specified, must be one of sha512 or sha256");
237
238        // Pre-processing directory verification
239        notNull(targetDir, "targetDir must be provided!");
240        if (!targetDir.exists()) {
241            targetDir.mkdirs();
242        }
243
244        if (workingDir == null) {
245            LOGGER.info("No working-dir option passed in - using current directory.");
246            workingDir = new File(System.getProperty("user.dir"));
247        }
248        if (!workingDir.exists()) {
249            workingDir.mkdirs();
250        }
251        indexDir = new File(workingDir, "index");
252
253        if (migrationType == MigrationType.FEDORA_OCFL) {
254            // Fedora 6.0.0 expects a data/ocfl-root structure
255            ocflStorageDir = targetDir.toPath().resolve("data").resolve("ocfl-root").toFile();
256            if (!ocflStorageDir.exists()) {
257                ocflStorageDir.mkdirs();
258            }
259        } else {
260            ocflStorageDir = targetDir;
261        }
262
263        // Create Staging dir
264        final File ocflStagingDir = new File(workingDir, "staging");
265        if (!ocflStagingDir.exists()) {
266            ocflStagingDir.mkdirs();
267        }
268
269        // Create PID list dir
270        final File pidDir = new File(workingDir, "pid");
271        if (!pidDir.exists()) {
272            pidDir.mkdirs();
273        }
274
275        // Which F3 source are we using? - verify associated options
276        final ObjectSource objectSource;
277        InternalIDResolver idResolver = null;
278        switch (f3SourceType) {
279            case EXPORTED:
280                notNull(f3ExportedDir, "f3ExportDir must be used with 'exported' source!");
281
282                objectSource = new ArchiveExportedFoxmlDirectoryObjectSource(f3ExportedDir, f3hostname);
283                break;
284            case AKUBRA:
285                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
286                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
287                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
288                        f3ObjectsDir.getAbsolutePath());
289
290                idResolver = new AkubraFSIDResolver(indexDir, f3DatastreamsDir);
291                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
292                break;
293            case LEGACY:
294                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
295                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
296                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
297                        f3ObjectsDir.getAbsolutePath());
298
299                idResolver = new LegacyFSIDResolver(indexDir, f3DatastreamsDir);
300                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
301                break;
302            default:
303                throw new RuntimeException("Should never happen");
304        }
305
306        // setup HttpServer + micrometer for publishing metrics
307        final PrometheusActuator actuator = new PrometheusActuator(enableMetrics);
308        actuator.start();
309
310        final OcflObjectSessionFactory ocflSessionFactory = new OcflSessionFactoryFactoryBean(ocflStorageDir.toPath(),
311                ocflStagingDir.toPath(), migrationType, user, userUri, algorithm, disableChecksumValidation)
312                .getObject();
313
314        final FedoraObjectVersionHandler archiveGroupHandler =
315                new ArchiveGroupHandler(
316                        ocflSessionFactory, migrationType,
317                        atomicResources ? ResourceMigrationType.ATOMIC : ResourceMigrationType.ARCHIVAL,
318                        addExtensions, deleteInactive, foxmlFile,
319                        user, idPrefix, headOnly, disableChecksumValidation, disableDc);
320        final StreamingFedoraObjectHandler objectHandler = new ObjectAbstractionStreamingFedoraObjectHandler(
321                archiveGroupHandler);
322
323        // PID-list-managers
324        // - Resume PID manager: the second arg is "acceptAll". If resuming, we do not "acceptAll")
325        final ResumePidListManager resumeManager = new ResumePidListManager(pidDir, !resume);
326
327        // - PID-list manager
328        final UserProvidedPidListManager pidListManager = new UserProvidedPidListManager(pidFile);
329
330        final Migrator migrator = new Migrator();
331        migrator.setLimit(objectLimit);
332        migrator.setSource(objectSource);
333        migrator.setHandler(objectHandler);
334        migrator.setResumePidListManager(resumeManager);
335        migrator.setUserProvidedPidListManager(pidListManager);
336        migrator.setContinueOnError(continueOnError);
337
338        // for some reason ARQ does not implicitly init on some systems
339        ARQ.init();
340
341        try {
342            migrator.run();
343        } finally {
344            ocflSessionFactory.close();
345            if (idResolver != null) {
346                idResolver.close();
347            }
348            FileUtils.deleteDirectory(ocflStagingDir);
349            actuator.stop();
350        }
351
352        return 0;
353    }
354
355}