001/*
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.migration;
017
018import static edu.wisc.library.ocfl.api.util.Enforce.expressionTrue;
019import static edu.wisc.library.ocfl.api.util.Enforce.notNull;
020import static org.slf4j.LoggerFactory.getLogger;
021import static picocli.CommandLine.Help.Visibility.ALWAYS;
022
023import java.io.File;
024import java.util.Arrays;
025import java.util.List;
026import java.util.concurrent.Callable;
027
028import org.fcrepo.migration.foxml.AkubraFSIDResolver;
029import org.fcrepo.migration.foxml.ArchiveExportedFoxmlDirectoryObjectSource;
030import org.fcrepo.migration.foxml.InternalIDResolver;
031import org.fcrepo.migration.foxml.LegacyFSIDResolver;
032import org.fcrepo.migration.foxml.NativeFoxmlDirectoryObjectSource;
033import org.fcrepo.migration.handlers.ObjectAbstractionStreamingFedoraObjectHandler;
034import org.fcrepo.migration.handlers.ocfl.ArchiveGroupHandler;
035import org.fcrepo.migration.pidlist.PidListManager;
036import org.fcrepo.migration.pidlist.ResumePidListManager;
037import org.fcrepo.migration.pidlist.UserProvidedPidListManager;
038import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
039
040import org.apache.commons.io.FileUtils;
041import org.slf4j.Logger;
042import org.slf4j.LoggerFactory;
043
044import ch.qos.logback.classic.Level;
045import ch.qos.logback.classic.LoggerContext;
046import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry;
047import edu.wisc.library.ocfl.api.model.DigestAlgorithm;
048import picocli.CommandLine;
049import picocli.CommandLine.Command;
050import picocli.CommandLine.Option;
051
052
053/**
054 * This class provides a simple CLI for running and configuring migration-utils
055 * - See README.md for usage details
056 *
057 * @author Remi Malessa
058 * @author awoods
059 * @since 2019-11-15
060 */
061@Command(name = "migration-utils", mixinStandardHelpOptions = true, sortOptions = false,
062        version = "Migration Utils - 4.4.1.b")
063public class PicocliMigrator implements Callable<Integer> {
064
065    private static final Logger LOGGER = getLogger(PicocliMigrator.class);
066
067    private enum F3SourceTypes {
068        AKUBRA, LEGACY, EXPORTED;
069
070        static F3SourceTypes toType(final String v) {
071            return valueOf(v.toUpperCase());
072        }
073    }
074
075    private final String DEFAULT_PREFIX = "info:fedora/";
076
077    @Option(names = {"--source-type", "-t"}, required = true, order = 1,
078            description = "Fedora 3 source type. Choices: akubra | legacy | exported")
079    private F3SourceTypes f3SourceType;
080
081    @Option(names = {"--datastreams-dir", "-d"}, order = 2,
082            description = "Directory containing Fedora 3 datastreams (used with --source-type 'akubra' or 'legacy')")
083    private File f3DatastreamsDir;
084
085    @Option(names = {"--objects-dir", "-o"}, order = 3,
086            description = "Directory containing Fedora 3 objects (used with --source-type 'akubra' or 'legacy')")
087    private File f3ObjectsDir;
088
089    @Option(names = {"--exported-dir", "-e"}, order = 4,
090            description = "Directory containing Fedora 3 export (used with --source-type 'exported')")
091    private File f3ExportedDir;
092
093    @Option(names = {"--target-dir", "-a"}, required = true, order = 5,
094            description = "OCFL storage root directory (data/ocfl-root is created for migration-type FEDORA_OCFL)")
095    private File targetDir;
096
097    @Option(names = {"--working-dir", "-i"}, order = 6,
098            description = "Directory where supporting state will be written (cached index of datastreams, ...)")
099    private File workingDir;
100
101    @Option(names = {"--delete-inactive", "-I"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 18,
102            description = "Migrate objects and datastreams in the Inactive state as deleted. Default: false.")
103    private boolean deleteInactive;
104
105    @Option(names = {"--migration-type", "-m"}, defaultValue = "FEDORA_OCFL", showDefaultValue = ALWAYS, order = 19,
106            description = "Type of OCFL objects to migrate to. Choices: FEDORA_OCFL | PLAIN_OCFL")
107    private MigrationType migrationType;
108
109    @Option(names = {"--id-prefix"}, defaultValue = DEFAULT_PREFIX, showDefaultValue = ALWAYS, order = 20,
110            description = "Only use this for PLAIN_OCFL migrations: Prefix to add to PIDs for OCFL object IDs"
111                + " - defaults to info:fedora/, like Fedora3")
112    private String idPrefix;
113
114    @Option(names = {"--foxml-file"}, defaultValue = "false", order = 21,
115            description = "Migrate FOXML file as a whole file, instead of creating property files. FOXML file will"
116                + " be migrated, then marked as deleted so it doesn't show up as an active file.")
117    private boolean foxmlFile;
118
119    @Option(names = {"--limit", "-l"}, defaultValue = "-1", order = 22,
120            description = "Limit number of objects to be processed.\n  Default: no limit")
121    private int objectLimit;
122
123    @Option(names = {"--resume", "-r"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 23,
124            description = "Resume from last successfully migrated Fedora 3 object")
125    private boolean resume;
126
127    @Option(names = {"--continue-on-error", "-c"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 24,
128            description = "Continue to next PID if an error occurs (instead of exiting). Disabled by default.")
129    private boolean continueOnError;
130
131    @Option(names = {"--pid-file", "-p"}, order = 25,
132            description = "PID file listing which Fedora 3 objects to migrate")
133    private File pidFile;
134
135    @Option(names = {"--extensions", "-x"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 26,
136            description = "Add file extensions to migrated datastreams based on mimetype recorded in FOXML")
137    private boolean addExtensions;
138
139    @Option(names = {"--f3hostname", "-f"}, defaultValue = "fedora.info", showDefaultValue = ALWAYS, order = 27,
140            description = "Hostname of Fedora 3, used for replacing placeholder in 'E' and 'R' datastream URLs")
141    private String f3hostname;
142
143    @Option(names = {"--username", "-u"}, defaultValue = "fedoraAdmin", showDefaultValue = ALWAYS, order = 28,
144            description = "The username to associate with all of the migrated resources.")
145    private String user;
146
147    @Option(names = {"--user-uri", "-U"}, defaultValue = "info:fedora/fedoraAdmin", showDefaultValue = ALWAYS,
148            order = 29, description = "The username to associate with all of the migrated resources.")
149    private String userUri;
150
151    @Option(names = {"--algorithm"}, defaultValue = "sha512", showDefaultValue = ALWAYS, order = 30,
152            description = "The digest algorithm to use in the OCFL objects created. Either sha256 or sha512")
153    private String digestAlgorithm;
154
155    @Option(names = {"--no-checksum-validation"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 31,
156            description = "Disable validation that datastream content matches Fedora 3 checksum.")
157    private boolean disableChecksumValidation;
158
159    @Option(names = {"--debug"}, order = 32, description = "Enables debug logging")
160    private boolean debug;
161
162    private File indexDir;
163
164    private File ocflStorageDir;
165
166    /**
167     * @param args Command line arguments
168     */
169    public static void main(final String[] args) {
170        final PicocliMigrator migrator = new PicocliMigrator();
171        final CommandLine cmd = new CommandLine(migrator);
172        cmd.registerConverter(F3SourceTypes.class, F3SourceTypes::toType);
173        cmd.setExecutionExceptionHandler(new PicoliMigrationExceptionHandler(migrator));
174
175        cmd.execute(args);
176    }
177
178    private static class PicoliMigrationExceptionHandler implements CommandLine.IExecutionExceptionHandler {
179
180        private final PicocliMigrator migrator;
181
182        PicoliMigrationExceptionHandler(final PicocliMigrator migrator) {
183            this.migrator = migrator;
184        }
185
186        @Override
187        public int handleExecutionException(
188                final Exception ex,
189                final CommandLine commandLine,
190                final CommandLine.ParseResult parseResult) {
191            commandLine.getErr().println(ex.getMessage());
192            if (migrator.debug) {
193                ex.printStackTrace(commandLine.getErr());
194            }
195            commandLine.usage(commandLine.getErr());
196            return commandLine.getCommandSpec().exitCodeOnExecutionException();
197        }
198    }
199
200    private static void setDebugLogLevel() {
201        final LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
202        final ch.qos.logback.classic.Logger logger = loggerContext.getLogger("org.fcrepo.migration");
203        logger.setLevel(Level.toLevel("DEBUG"));
204    }
205
206    @Override
207    public Integer call() throws Exception {
208
209        // Set debug log level if requested
210        if (debug) {
211            setDebugLogLevel();
212        }
213
214        if (migrationType == MigrationType.FEDORA_OCFL && !idPrefix.equals(DEFAULT_PREFIX)) {
215            throw new IllegalArgumentException("Can't change the ID Prefix for FEDORA_OCFL migrations");
216        }
217
218        if (!digestAlgorithm.equals("sha512") && !digestAlgorithm.equalsIgnoreCase("sha256")) {
219            throw new IllegalArgumentException("Invalid algorithm specified, must be one of sha512 or sha256");
220        }
221        final DigestAlgorithm algorithm = DigestAlgorithmRegistry.getAlgorithm(digestAlgorithm);
222        notNull(algorithm, "Invalid algorithm specified, must be one of sha512 or sha256");
223
224        // Pre-processing directory verification
225        notNull(targetDir, "targetDir must be provided!");
226        if (!targetDir.exists()) {
227            targetDir.mkdirs();
228        }
229
230        if (workingDir == null) {
231            LOGGER.info("No working-dir option passed in - using current directory.");
232            workingDir = new File(System.getProperty("user.dir"));
233        }
234        if (!workingDir.exists()) {
235            workingDir.mkdirs();
236        }
237        indexDir = new File(workingDir, "index");
238
239        if (migrationType == MigrationType.FEDORA_OCFL) {
240            // Fedora 6.0.0 expects a data/ocfl-root structure
241            ocflStorageDir = targetDir.toPath().resolve("data").resolve("ocfl-root").toFile();
242            if (!ocflStorageDir.exists()) {
243                ocflStorageDir.mkdirs();
244            }
245        } else {
246            ocflStorageDir = targetDir;
247        }
248
249        // Create Staging dir
250        final File ocflStagingDir = new File(workingDir, "staging");
251        if (!ocflStagingDir.exists()) {
252            ocflStagingDir.mkdirs();
253        }
254
255        // Create PID list dir
256        final File pidDir = new File(workingDir, "pid");
257        if (!pidDir.exists()) {
258            pidDir.mkdirs();
259        }
260
261        // Which F3 source are we using? - verify associated options
262        final ObjectSource objectSource;
263        InternalIDResolver idResolver = null;
264        switch (f3SourceType) {
265            case EXPORTED:
266                notNull(f3ExportedDir, "f3ExportDir must be used with 'exported' source!");
267
268                objectSource = new ArchiveExportedFoxmlDirectoryObjectSource(f3ExportedDir, f3hostname);
269                break;
270            case AKUBRA:
271                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
272                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
273                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
274                        f3ObjectsDir.getAbsolutePath());
275
276                idResolver = new AkubraFSIDResolver(indexDir, f3DatastreamsDir);
277                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
278                break;
279            case LEGACY:
280                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
281                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
282                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
283                        f3ObjectsDir.getAbsolutePath());
284
285                idResolver = new LegacyFSIDResolver(indexDir, f3DatastreamsDir);
286                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
287                break;
288            default:
289                throw new RuntimeException("Should never happen");
290        }
291
292        final OcflObjectSessionFactory ocflSessionFactory = new OcflSessionFactoryFactoryBean(ocflStorageDir.toPath(),
293                ocflStagingDir.toPath(), migrationType, user, userUri, algorithm).getObject();
294
295        final FedoraObjectVersionHandler archiveGroupHandler =
296                new ArchiveGroupHandler(ocflSessionFactory, migrationType, addExtensions, deleteInactive, foxmlFile,
297                        user, idPrefix, disableChecksumValidation);
298        final StreamingFedoraObjectHandler objectHandler = new ObjectAbstractionStreamingFedoraObjectHandler(
299                archiveGroupHandler);
300
301        // PID-list-managers
302        // - Resume PID manager: the second arg is "acceptAll". If resuming, we do not "acceptAll")
303        final ResumePidListManager resumeManager = new ResumePidListManager(pidDir, !resume);
304
305        // - PID-list manager
306        final UserProvidedPidListManager pidListManager = new UserProvidedPidListManager(pidFile);
307
308        final List<PidListManager> pidListManagerList = Arrays.asList(pidListManager, resumeManager);
309
310        final Migrator migrator = new Migrator();
311        migrator.setLimit(objectLimit);
312        migrator.setSource(objectSource);
313        migrator.setHandler(objectHandler);
314        migrator.setPidListManagers(pidListManagerList);
315        migrator.setContinueOnError(continueOnError);
316
317        try {
318            migrator.run();
319        } finally {
320            ocflSessionFactory.close();
321            if (idResolver != null) {
322                idResolver.close();
323            }
324            FileUtils.deleteDirectory(ocflStagingDir);
325        }
326
327        return 0;
328    }
329
330}