001/*
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.migration;
017
018import ch.qos.logback.classic.Level;
019import ch.qos.logback.classic.LoggerContext;
020import org.apache.commons.io.FileUtils;
021import org.fcrepo.migration.foxml.AkubraFSIDResolver;
022import org.fcrepo.migration.foxml.ArchiveExportedFoxmlDirectoryObjectSource;
023import org.fcrepo.migration.foxml.InternalIDResolver;
024import org.fcrepo.migration.foxml.LegacyFSIDResolver;
025import org.fcrepo.migration.foxml.NativeFoxmlDirectoryObjectSource;
026import org.fcrepo.migration.handlers.ObjectAbstractionStreamingFedoraObjectHandler;
027import org.fcrepo.migration.handlers.VersionAbstractionFedoraObjectHandler;
028import org.fcrepo.migration.handlers.ocfl.ArchiveGroupHandler;
029import org.fcrepo.migration.pidlist.PidListManager;
030import org.fcrepo.migration.pidlist.ResumePidListManager;
031import org.fcrepo.migration.pidlist.UserProvidedPidListManager;
032import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035import picocli.CommandLine;
036import picocli.CommandLine.Command;
037import picocli.CommandLine.Option;
038
039import java.io.File;
040import java.util.Arrays;
041import java.util.List;
042import java.util.concurrent.Callable;
043
044import static edu.wisc.library.ocfl.api.util.Enforce.expressionTrue;
045import static edu.wisc.library.ocfl.api.util.Enforce.notNull;
046import static org.slf4j.LoggerFactory.getLogger;
047import static picocli.CommandLine.Help.Visibility.ALWAYS;
048
049
050/**
051 * This class provides a simple CLI for running and configuring migration-utils
052 * - See README.md for usage details
053 *
054 * @author Remi Malessa
055 * @author awoods
056 * @since 2019-11-15
057 */
058@Command(name = "migration-utils", mixinStandardHelpOptions = true, sortOptions = false,
059        version = "Migration Utils - 4.4.1.b")
060public class PicocliMigrator implements Callable<Integer> {
061
062    private static final Logger LOGGER = getLogger(PicocliMigrator.class);
063
064    private enum F3SourceTypes {
065        AKUBRA, LEGACY, EXPORTED;
066
067        static F3SourceTypes toType(final String v) {
068            return valueOf(v.toUpperCase());
069        }
070    }
071
072    @Option(names = {"--source-type", "-t"}, required = true, order = 1,
073            description = "Fedora 3 source type. Choices: akubra | legacy | exported")
074    private F3SourceTypes f3SourceType;
075
076    @Option(names = {"--datastreams-dir", "-d"}, order = 2,
077            description = "Directory containing Fedora 3 datastreams (used with --source-type 'akubra' or 'legacy')")
078    private File f3DatastreamsDir;
079
080    @Option(names = {"--objects-dir", "-o"}, order = 3,
081            description = "Directory containing Fedora 3 objects (used with --source-type 'akubra' or 'legacy')")
082    private File f3ObjectsDir;
083
084    @Option(names = {"--exported-dir", "-e"}, order = 4,
085            description = "Directory containing Fedora 3 export (used with --source-type 'exported')")
086    private File f3ExportedDir;
087
088    @Option(names = {"--target-dir", "-a"}, required = true, order = 5,
089            description = "Directory where OCFL storage root and supporting state will be written")
090    private File targetDir;
091
092    @Option(names = {"--delete-inactive", "-I"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 18,
093            description = "Migrate objects and datastreams in the Inactive state as deleted. Default: false.")
094    private boolean deleteInactive;
095
096    @Option(names = {"--migration-type", "-m"}, defaultValue = "FEDORA_OCFL", showDefaultValue = ALWAYS, order = 19,
097            description = "Type of OCFL objects to migrate to. Choices: FEDORA_OCFL | PLAIN_OCFL")
098    private MigrationType migrationType;
099
100    @Option(names = {"--limit", "-l"}, defaultValue = "-1", order = 21,
101            description = "Limit number of objects to be processed.\n  Default: no limit")
102    private int objectLimit;
103
104    @Option(names = {"--resume", "-r"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 22,
105            description = "Resume from last successfully migrated Fedora 3 object")
106    private boolean resume;
107
108    @Option(names = {"--continue-on-error", "-c"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 23,
109            description = "Continue to next PID if an error occurs (instead of exiting). Disabled by default.")
110    private boolean continueOnError;
111
112    @Option(names = {"--pid-file", "-p"}, order = 24,
113            description = "PID file listing which Fedora 3 objects to migrate")
114    private File pidFile;
115
116    @Option(names = {"--index-dir", "-i"}, order = 25,
117            description = "Directory where cached index of datastreams (will reuse index if already exists)")
118    private File indexDir;
119
120    @Option(names = {"--extensions", "-x"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 26,
121            description = "Add file extensions to migrated datastreams based on mimetype recorded in FOXML")
122    private boolean addExtensions;
123
124    @Option(names = {"--f3hostname", "-f"}, defaultValue = "fedora.info", showDefaultValue = ALWAYS, order = 27,
125            description = "Hostname of Fedora 3, used for replacing placeholder in 'E' and 'R' datastream URLs")
126    private String f3hostname;
127
128    @Option(names = {"--username", "-u"}, defaultValue = "fedoraAdmin", showDefaultValue = ALWAYS, order = 28,
129            description = "The username to associate with all of the migrated resources.")
130    private String user;
131
132    @Option(names = {"--user-uri", "-U"}, defaultValue = "info:fedora/fedoraAdmin", showDefaultValue = ALWAYS,
133            order = 29, description = "The username to associate with all of the migrated resources.")
134    private String userUri;
135
136    @Option(names = {"--debug"}, order = 30, description = "Enables debug logging")
137    private boolean debug;
138
139
140    /**
141     * @param args Command line arguments
142     */
143    public static void main(final String[] args) {
144        final PicocliMigrator migrator = new PicocliMigrator();
145        final CommandLine cmd = new CommandLine(migrator);
146        cmd.registerConverter(F3SourceTypes.class, F3SourceTypes::toType);
147        cmd.setExecutionExceptionHandler(new PicoliMigrationExceptionHandler(migrator));
148
149        cmd.execute(args);
150    }
151
152    private static class PicoliMigrationExceptionHandler implements CommandLine.IExecutionExceptionHandler {
153
154        private final PicocliMigrator migrator;
155
156        PicoliMigrationExceptionHandler(final PicocliMigrator migrator) {
157            this.migrator = migrator;
158        }
159
160        @Override
161        public int handleExecutionException(
162                final Exception ex,
163                final CommandLine commandLine,
164                final CommandLine.ParseResult parseResult) {
165            commandLine.getErr().println(ex.getMessage());
166            if (migrator.debug) {
167                ex.printStackTrace(commandLine.getErr());
168            }
169            commandLine.usage(commandLine.getErr());
170            return commandLine.getCommandSpec().exitCodeOnExecutionException();
171        }
172    }
173
174    private static void setDebugLogLevel() {
175        final LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
176        final ch.qos.logback.classic.Logger logger = loggerContext.getLogger("org.fcrepo.migration");
177        logger.setLevel(Level.toLevel("DEBUG"));
178    }
179
180    @Override
181    public Integer call() throws Exception {
182
183        // Set debug log level if requested
184        if (debug) {
185            setDebugLogLevel();
186        }
187
188        // Pre-processing directory verification
189        notNull(targetDir, "targetDir must be provided!");
190        if (!targetDir.exists()) {
191            targetDir.mkdirs();
192        }
193
194        // Fedora 6.0.0 expects a data directory at the top.
195        final File dataDir = targetDir.toPath().resolve("data").toFile();
196        if (!dataDir.exists()) {
197            dataDir.mkdirs();
198        }
199
200        // Create OCFL Storage dir
201        final File ocflStorageDir = new File(dataDir, "ocfl-root");
202        if (!ocflStorageDir.exists()) {
203            ocflStorageDir.mkdirs();
204        }
205
206        // Create Staging dir
207        final File ocflStagingDir = new File(dataDir, "staging");
208        if (!ocflStagingDir.exists()) {
209            ocflStagingDir.mkdirs();
210        }
211
212        // Create PID list dir
213        final File pidDir = new File(targetDir, "pid");
214        if (!pidDir.exists()) {
215            pidDir.mkdirs();
216        }
217
218        // Which F3 source are we using? - verify associated options
219        final ObjectSource objectSource;
220        final InternalIDResolver idResolver;
221        switch (f3SourceType) {
222            case EXPORTED:
223                notNull(f3ExportedDir, "f3ExportDir must be used with 'exported' source!");
224
225                objectSource = new ArchiveExportedFoxmlDirectoryObjectSource(f3ExportedDir, f3hostname);
226                break;
227            case AKUBRA:
228                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
229                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
230                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
231                        f3ObjectsDir.getAbsolutePath());
232
233                idResolver = new AkubraFSIDResolver(indexDir, f3DatastreamsDir);
234                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
235                break;
236            case LEGACY:
237                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
238                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
239                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
240                        f3ObjectsDir.getAbsolutePath());
241
242                idResolver = new LegacyFSIDResolver(indexDir, f3DatastreamsDir);
243                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
244                break;
245            default:
246                throw new RuntimeException("Should never happen");
247        }
248
249        final OcflObjectSessionFactory ocflSessionFactory = new OcflSessionFactoryFactoryBean(ocflStorageDir.toPath(),
250                ocflStagingDir.toPath(), migrationType, user, userUri).getObject();
251
252        final FedoraObjectVersionHandler archiveGroupHandler =
253                new ArchiveGroupHandler(ocflSessionFactory, migrationType, addExtensions, deleteInactive, user);
254        final FedoraObjectHandler versionHandler = new VersionAbstractionFedoraObjectHandler(archiveGroupHandler);
255        final StreamingFedoraObjectHandler objectHandler = new ObjectAbstractionStreamingFedoraObjectHandler(
256                versionHandler);
257
258        // PID-list-managers
259        // - Resume PID manager: the second arg is "acceptAll". If resuming, we do not "acceptAll")
260        final ResumePidListManager resumeManager = new ResumePidListManager(pidDir, !resume);
261
262        // - PID-list manager
263        final UserProvidedPidListManager pidListManager = new UserProvidedPidListManager(pidFile);
264
265        final List<PidListManager> pidListManagerList = Arrays.asList(pidListManager, resumeManager);
266
267        final Migrator migrator = new Migrator();
268        migrator.setLimit(objectLimit);
269        migrator.setSource(objectSource);
270        migrator.setHandler(objectHandler);
271        migrator.setPidListManagers(pidListManagerList);
272        migrator.setContinueOnError(continueOnError);
273
274        try {
275            migrator.run();
276        } finally {
277            ocflSessionFactory.close();
278            FileUtils.deleteDirectory(ocflStagingDir);
279        }
280
281        return 0;
282    }
283
284}