001/*
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.migration;
017
018import static edu.wisc.library.ocfl.api.util.Enforce.expressionTrue;
019import static edu.wisc.library.ocfl.api.util.Enforce.notNull;
020import static org.slf4j.LoggerFactory.getLogger;
021import static picocli.CommandLine.Help.Visibility.ALWAYS;
022
023import java.io.File;
024import java.util.concurrent.Callable;
025
026import org.fcrepo.migration.foxml.AkubraFSIDResolver;
027import org.fcrepo.migration.foxml.ArchiveExportedFoxmlDirectoryObjectSource;
028import org.fcrepo.migration.foxml.InternalIDResolver;
029import org.fcrepo.migration.foxml.LegacyFSIDResolver;
030import org.fcrepo.migration.foxml.NativeFoxmlDirectoryObjectSource;
031import org.fcrepo.migration.handlers.ObjectAbstractionStreamingFedoraObjectHandler;
032import org.fcrepo.migration.handlers.ocfl.ArchiveGroupHandler;
033import org.fcrepo.migration.metrics.PrometheusActuator;
034import org.fcrepo.migration.pidlist.ResumePidListManager;
035import org.fcrepo.migration.pidlist.UserProvidedPidListManager;
036import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
037
038import org.apache.commons.io.FileUtils;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042import ch.qos.logback.classic.Level;
043import ch.qos.logback.classic.LoggerContext;
044import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry;
045import edu.wisc.library.ocfl.api.model.DigestAlgorithm;
046import picocli.CommandLine;
047import picocli.CommandLine.Command;
048import picocli.CommandLine.Option;
049
050
051/**
052 * This class provides a simple CLI for running and configuring migration-utils
053 * - See README.md for usage details
054 *
055 * @author Remi Malessa
056 * @author awoods
057 * @since 2019-11-15
058 */
059@Command(name = "migration-utils", mixinStandardHelpOptions = true, sortOptions = false,
060        version = "Migration Utils - 4.4.1.b")
061public class PicocliMigrator implements Callable<Integer> {
062
063    private static final Logger LOGGER = getLogger(PicocliMigrator.class);
064
065    private enum F3SourceTypes {
066        AKUBRA, LEGACY, EXPORTED;
067
068        static F3SourceTypes toType(final String v) {
069            return valueOf(v.toUpperCase());
070        }
071    }
072
073    private final String DEFAULT_PREFIX = "info:fedora/";
074
075    @Option(names = {"--source-type", "-t"}, required = true, order = 1,
076            description = "Fedora 3 source type. Choices: akubra | legacy | exported")
077    private F3SourceTypes f3SourceType;
078
079    @Option(names = {"--datastreams-dir", "-d"}, order = 2,
080            description = "Directory containing Fedora 3 datastreams (used with --source-type 'akubra' or 'legacy')")
081    private File f3DatastreamsDir;
082
083    @Option(names = {"--objects-dir", "-o"}, order = 3,
084            description = "Directory containing Fedora 3 objects (used with --source-type 'akubra' or 'legacy')")
085    private File f3ObjectsDir;
086
087    @Option(names = {"--exported-dir", "-e"}, order = 4,
088            description = "Directory containing Fedora 3 export (used with --source-type 'exported')")
089    private File f3ExportedDir;
090
091    @Option(names = {"--target-dir", "-a"}, required = true, order = 5,
092            description = "OCFL storage root directory (data/ocfl-root is created for migration-type FEDORA_OCFL)")
093    private File targetDir;
094
095    @Option(names = {"--working-dir", "-i"}, order = 6,
096            description = "Directory where supporting state will be written (cached index of datastreams, ...)")
097    private File workingDir;
098
099    @Option(names = {"--delete-inactive", "-I"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 18,
100            description = "Migrate objects and datastreams in the Inactive state as deleted. Default: false.")
101    private boolean deleteInactive;
102
103    @Option(names = {"--migration-type", "-m"}, defaultValue = "FEDORA_OCFL", showDefaultValue = ALWAYS, order = 19,
104            description = "Type of OCFL objects to migrate to. Choices: FEDORA_OCFL | PLAIN_OCFL")
105    private MigrationType migrationType;
106
107    @Option(names = {"--id-prefix"}, defaultValue = DEFAULT_PREFIX, showDefaultValue = ALWAYS, order = 20,
108            description = "Only use this for PLAIN_OCFL migrations: Prefix to add to PIDs for OCFL object IDs"
109                + " - defaults to info:fedora/, like Fedora3")
110    private String idPrefix;
111
112    @Option(names = {"--foxml-file"}, defaultValue = "false", order = 21,
113            description = "Migrate FOXML file as a whole file, instead of creating property files. FOXML file will"
114                + " be migrated, then marked as deleted so it doesn't show up as an active file.")
115    private boolean foxmlFile;
116
117    @Option(names = {"--limit", "-l"}, defaultValue = "-1", order = 22,
118            description = "Limit number of objects to be processed.\n  Default: no limit")
119    private int objectLimit;
120
121    @Option(names = {"--resume", "-r"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 23,
122            description = "Resume from last successfully migrated Fedora 3 object")
123    private boolean resume;
124
125    @Option(names = {"--continue-on-error", "-c"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 24,
126            description = "Continue to next PID if an error occurs (instead of exiting). Disabled by default.")
127    private boolean continueOnError;
128
129    @Option(names = {"--pid-file", "-p"}, order = 25,
130            description = "PID file listing which Fedora 3 objects to migrate")
131    private File pidFile;
132
133    @Option(names = {"--extensions", "-x"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 26,
134            description = "Add file extensions to migrated datastreams based on mimetype recorded in FOXML")
135    private boolean addExtensions;
136
137    @Option(names = {"--f3hostname", "-f"}, defaultValue = "fedora.info", showDefaultValue = ALWAYS, order = 27,
138            description = "Hostname of Fedora 3, used for replacing placeholder in 'E' and 'R' datastream URLs")
139    private String f3hostname;
140
141    @Option(names = {"--username", "-u"}, defaultValue = "fedoraAdmin", showDefaultValue = ALWAYS, order = 28,
142            description = "The username to associate with all of the migrated resources.")
143    private String user;
144
145    @Option(names = {"--user-uri", "-U"}, defaultValue = "info:fedora/fedoraAdmin", showDefaultValue = ALWAYS,
146            order = 29, description = "The username to associate with all of the migrated resources.")
147    private String userUri;
148
149    @Option(names = {"--algorithm"}, defaultValue = "sha512", showDefaultValue = ALWAYS, order = 30,
150            description = "The digest algorithm to use in the OCFL objects created. Either sha256 or sha512")
151    private String digestAlgorithm;
152
153    @Option(names = {"--no-checksum-validation"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 31,
154            description = "Disable validation that datastream content matches Fedora 3 checksum.")
155    private boolean disableChecksumValidation;
156
157    @Option(names = {"--enable-metrics"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 32,
158            description = "Enable gathering of metrics for a Prometheus instance. " +
159                          "\nNote: this requires port 8080 to be free in order for Prometheus to scrape metrics.")
160    private boolean enableMetrics;
161
162    @Option(names = {"--debug"}, order = 32, description = "Enables debug logging")
163    private boolean debug;
164
165    private File indexDir;
166
167    private File ocflStorageDir;
168
169    /**
170     * @param args Command line arguments
171     */
172    public static void main(final String[] args) {
173        final PicocliMigrator migrator = new PicocliMigrator();
174        final CommandLine cmd = new CommandLine(migrator);
175        cmd.registerConverter(F3SourceTypes.class, F3SourceTypes::toType);
176        cmd.setExecutionExceptionHandler(new PicoliMigrationExceptionHandler(migrator));
177
178        cmd.execute(args);
179    }
180
181    private static class PicoliMigrationExceptionHandler implements CommandLine.IExecutionExceptionHandler {
182
183        private final PicocliMigrator migrator;
184
185        PicoliMigrationExceptionHandler(final PicocliMigrator migrator) {
186            this.migrator = migrator;
187        }
188
189        @Override
190        public int handleExecutionException(
191                final Exception ex,
192                final CommandLine commandLine,
193                final CommandLine.ParseResult parseResult) {
194            commandLine.getErr().println(ex.getMessage());
195            if (migrator.debug) {
196                ex.printStackTrace(commandLine.getErr());
197            }
198            commandLine.usage(commandLine.getErr());
199            return commandLine.getCommandSpec().exitCodeOnExecutionException();
200        }
201    }
202
203    private static void setDebugLogLevel() {
204        final LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
205        final ch.qos.logback.classic.Logger logger = loggerContext.getLogger("org.fcrepo.migration");
206        logger.setLevel(Level.toLevel("DEBUG"));
207    }
208
209    @Override
210    public Integer call() throws Exception {
211
212        // Set debug log level if requested
213        if (debug) {
214            setDebugLogLevel();
215        }
216
217        if (migrationType == MigrationType.FEDORA_OCFL && !idPrefix.equals(DEFAULT_PREFIX)) {
218            throw new IllegalArgumentException("Can't change the ID Prefix for FEDORA_OCFL migrations");
219        }
220
221        if (!digestAlgorithm.equals("sha512") && !digestAlgorithm.equalsIgnoreCase("sha256")) {
222            throw new IllegalArgumentException("Invalid algorithm specified, must be one of sha512 or sha256");
223        }
224        final DigestAlgorithm algorithm = DigestAlgorithmRegistry.getAlgorithm(digestAlgorithm);
225        notNull(algorithm, "Invalid algorithm specified, must be one of sha512 or sha256");
226
227        // Pre-processing directory verification
228        notNull(targetDir, "targetDir must be provided!");
229        if (!targetDir.exists()) {
230            targetDir.mkdirs();
231        }
232
233        if (workingDir == null) {
234            LOGGER.info("No working-dir option passed in - using current directory.");
235            workingDir = new File(System.getProperty("user.dir"));
236        }
237        if (!workingDir.exists()) {
238            workingDir.mkdirs();
239        }
240        indexDir = new File(workingDir, "index");
241
242        if (migrationType == MigrationType.FEDORA_OCFL) {
243            // Fedora 6.0.0 expects a data/ocfl-root structure
244            ocflStorageDir = targetDir.toPath().resolve("data").resolve("ocfl-root").toFile();
245            if (!ocflStorageDir.exists()) {
246                ocflStorageDir.mkdirs();
247            }
248        } else {
249            ocflStorageDir = targetDir;
250        }
251
252        // Create Staging dir
253        final File ocflStagingDir = new File(workingDir, "staging");
254        if (!ocflStagingDir.exists()) {
255            ocflStagingDir.mkdirs();
256        }
257
258        // Create PID list dir
259        final File pidDir = new File(workingDir, "pid");
260        if (!pidDir.exists()) {
261            pidDir.mkdirs();
262        }
263
264        // Which F3 source are we using? - verify associated options
265        final ObjectSource objectSource;
266        InternalIDResolver idResolver = null;
267        switch (f3SourceType) {
268            case EXPORTED:
269                notNull(f3ExportedDir, "f3ExportDir must be used with 'exported' source!");
270
271                objectSource = new ArchiveExportedFoxmlDirectoryObjectSource(f3ExportedDir, f3hostname);
272                break;
273            case AKUBRA:
274                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
275                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
276                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
277                        f3ObjectsDir.getAbsolutePath());
278
279                idResolver = new AkubraFSIDResolver(indexDir, f3DatastreamsDir);
280                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
281                break;
282            case LEGACY:
283                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
284                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
285                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
286                        f3ObjectsDir.getAbsolutePath());
287
288                idResolver = new LegacyFSIDResolver(indexDir, f3DatastreamsDir);
289                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
290                break;
291            default:
292                throw new RuntimeException("Should never happen");
293        }
294
295        // setup HttpServer + micrometer for publishing metrics
296        final PrometheusActuator actuator = new PrometheusActuator(enableMetrics);
297        actuator.start();
298
299        final OcflObjectSessionFactory ocflSessionFactory = new OcflSessionFactoryFactoryBean(ocflStorageDir.toPath(),
300                ocflStagingDir.toPath(), migrationType, user, userUri, algorithm, disableChecksumValidation)
301                .getObject();
302
303        final FedoraObjectVersionHandler archiveGroupHandler =
304                new ArchiveGroupHandler(ocflSessionFactory, migrationType, addExtensions, deleteInactive, foxmlFile,
305                        user, idPrefix, disableChecksumValidation);
306        final StreamingFedoraObjectHandler objectHandler = new ObjectAbstractionStreamingFedoraObjectHandler(
307                archiveGroupHandler);
308
309        // PID-list-managers
310        // - Resume PID manager: the second arg is "acceptAll". If resuming, we do not "acceptAll")
311        final ResumePidListManager resumeManager = new ResumePidListManager(pidDir, !resume);
312
313        // - PID-list manager
314        final UserProvidedPidListManager pidListManager = new UserProvidedPidListManager(pidFile);
315
316        final Migrator migrator = new Migrator();
317        migrator.setLimit(objectLimit);
318        migrator.setSource(objectSource);
319        migrator.setHandler(objectHandler);
320        migrator.setResumePidListManager(resumeManager);
321        migrator.setUserProvidedPidListManager(pidListManager);
322        migrator.setContinueOnError(continueOnError);
323
324        try {
325            migrator.run();
326        } finally {
327            ocflSessionFactory.close();
328            if (idResolver != null) {
329                idResolver.close();
330            }
331            FileUtils.deleteDirectory(ocflStagingDir);
332            actuator.stop();
333        }
334
335        return 0;
336    }
337
338}