001/*
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.migration;
017
018import ch.qos.logback.classic.Level;
019import ch.qos.logback.classic.LoggerContext;
020import org.apache.commons.io.FileUtils;
021import org.fcrepo.migration.foxml.AkubraFSIDResolver;
022import org.fcrepo.migration.foxml.ArchiveExportedFoxmlDirectoryObjectSource;
023import org.fcrepo.migration.foxml.InternalIDResolver;
024import org.fcrepo.migration.foxml.LegacyFSIDResolver;
025import org.fcrepo.migration.foxml.NativeFoxmlDirectoryObjectSource;
026import org.fcrepo.migration.handlers.ObjectAbstractionStreamingFedoraObjectHandler;
027import org.fcrepo.migration.handlers.VersionAbstractionFedoraObjectHandler;
028import org.fcrepo.migration.handlers.ocfl.ArchiveGroupHandler;
029import org.fcrepo.migration.pidlist.PidListManager;
030import org.fcrepo.migration.pidlist.ResumePidListManager;
031import org.fcrepo.migration.pidlist.UserProvidedPidListManager;
032import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035import picocli.CommandLine;
036import picocli.CommandLine.Command;
037import picocli.CommandLine.Option;
038
039import java.io.File;
040import java.util.Arrays;
041import java.util.List;
042import java.util.concurrent.Callable;
043
044import static edu.wisc.library.ocfl.api.util.Enforce.expressionTrue;
045import static edu.wisc.library.ocfl.api.util.Enforce.notNull;
046import static org.slf4j.LoggerFactory.getLogger;
047import static picocli.CommandLine.Help.Visibility.ALWAYS;
048
049
050/**
051 * This class provides a simple CLI for running and configuring migration-utils
052 * - See README.md for usage details
053 *
054 * @author Remi Malessa
055 * @author awoods
056 * @since 2019-11-15
057 */
058@Command(name = "migration-utils", mixinStandardHelpOptions = true, sortOptions = false,
059        version = "Migration Utils - 4.4.1.b")
060public class PicocliMigrator implements Callable<Integer> {
061
062    private static final Logger LOGGER = getLogger(PicocliMigrator.class);
063
064    private enum F3SourceTypes {
065        AKUBRA, LEGACY, EXPORTED;
066
067        static F3SourceTypes toType(final String v) {
068            return valueOf(v.toUpperCase());
069        }
070    }
071
072    @Option(names = {"--source-type", "-t"}, required = true, order = 1,
073            description = "Fedora 3 source type. Choices: akubra | legacy | exported")
074    private F3SourceTypes f3SourceType;
075
076    @Option(names = {"--datastreams-dir", "-d"}, order = 2,
077            description = "Directory containing Fedora 3 datastreams (used with --source-type 'akubra' or 'legacy')")
078    private File f3DatastreamsDir;
079
080    @Option(names = {"--objects-dir", "-o"}, order = 3,
081            description = "Directory containing Fedora 3 objects (used with --source-type 'akubra' or 'legacy')")
082    private File f3ObjectsDir;
083
084    @Option(names = {"--exported-dir", "-e"}, order = 4,
085            description = "Directory containing Fedora 3 export (used with --source-type 'exported')")
086    private File f3ExportedDir;
087
088    @Option(names = {"--target-dir", "-a"}, required = true, order = 5,
089            description = "Directory where OCFL storage root and supporting state will be written")
090    private File targetDir;
091
092    @Option(names = {"--delete-inactive", "-I"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 18,
093            description = "Migrate objects and datastreams in the Inactive state as deleted. Default: false.")
094    private boolean deleteInactive;
095
096    @Option(names = {"--migration-type", "-m"}, defaultValue = "FEDORA_OCFL", showDefaultValue = ALWAYS, order = 19,
097            description = "Type of OCFL objects to migrate to. Choices: FEDORA_OCFL | PLAIN_OCFL")
098    private MigrationType migrationType;
099
100    @Option(names = {"--limit", "-l"}, defaultValue = "-1", order = 21,
101            description = "Limit number of objects to be processed.\n  Default: no limit")
102    private int objectLimit;
103
104    @Option(names = {"--resume", "-r"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 22,
105            description = "Resume from last successfully migrated Fedora 3 object")
106    private boolean resume;
107
108    @Option(names = {"--continue-on-error", "-c"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 23,
109            description = "Continue to next PID if an error occurs (instead of exiting). Disabled by default.")
110    private boolean continueOnError;
111
112    @Option(names = {"--pid-file", "-p"}, order = 24,
113            description = "PID file listing which Fedora 3 objects to migrate")
114    private File pidFile;
115
116    @Option(names = {"--index-dir", "-i"}, order = 25,
117            description = "Directory where cached index of datastreams (will reuse index if already exists)")
118    private File indexDir;
119
120    @Option(names = {"--extensions", "-x"}, defaultValue = "false", showDefaultValue = ALWAYS, order = 26,
121            description = "Add file extensions to migrated datastreams based on mimetype recorded in FOXML")
122    private boolean addExtensions;
123
124    @Option(names = {"--f3hostname", "-f"}, defaultValue = "fedora.info", showDefaultValue = ALWAYS, order = 27,
125            description = "Hostname of Fedora 3, used for replacing placeholder in 'E' and 'R' datastream URLs")
126    private String f3hostname;
127
128    @Option(names = {"--username", "-u"}, defaultValue = "fedoraAdmin", showDefaultValue = ALWAYS, order = 28,
129            description = "The username to associate with all of the migrated resources.")
130    private String user;
131
132    @Option(names = {"--user-uri", "-U"}, defaultValue = "info:fedora/fedoraAdmin", showDefaultValue = ALWAYS,
133            order = 29, description = "The username to associate with all of the migrated resources.")
134    private String userUri;
135
136    @Option(names = {"--debug"}, order = 30, description = "Enables debug logging")
137    private boolean debug;
138
139
140    /**
141     * @param args Command line arguments
142     */
143    public static void main(final String[] args) {
144        final PicocliMigrator migrator = new PicocliMigrator();
145        final CommandLine cmd = new CommandLine(migrator);
146        cmd.registerConverter(F3SourceTypes.class, F3SourceTypes::toType);
147        cmd.setExecutionExceptionHandler(new PicoliMigrationExceptionHandler(migrator));
148
149        cmd.execute(args);
150    }
151
152    private static class PicoliMigrationExceptionHandler implements CommandLine.IExecutionExceptionHandler {
153
154        private final PicocliMigrator migrator;
155
156        PicoliMigrationExceptionHandler(final PicocliMigrator migrator) {
157            this.migrator = migrator;
158        }
159
160        @Override
161        public int handleExecutionException(
162                final Exception ex,
163                final CommandLine commandLine,
164                final CommandLine.ParseResult parseResult) {
165            commandLine.getErr().println(ex.getMessage());
166            if (migrator.debug) {
167                ex.printStackTrace(commandLine.getErr());
168            }
169            commandLine.usage(commandLine.getErr());
170            return commandLine.getCommandSpec().exitCodeOnExecutionException();
171        }
172    }
173
174    private static void setDebugLogLevel() {
175        final LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
176        final ch.qos.logback.classic.Logger logger = loggerContext.getLogger("org.fcrepo.migration");
177        logger.setLevel(Level.toLevel("DEBUG"));
178    }
179
180    @Override
181    public Integer call() throws Exception {
182
183        // Set debug log level if requested
184        if (debug) {
185            setDebugLogLevel();
186        }
187
188        // Pre-processing directory verification
189        notNull(targetDir, "targetDir must be provided!");
190        if (!targetDir.exists()) {
191            targetDir.mkdirs();
192        }
193
194        // Create OCFL Storage dir
195        final File ocflStorageDir = new File(targetDir, "ocfl");
196        if (!ocflStorageDir.exists()) {
197            ocflStorageDir.mkdirs();
198        }
199
200        // Create Staging dir
201        final File ocflStagingDir = new File(targetDir, "staging");
202        if (!ocflStagingDir.exists()) {
203            ocflStagingDir.mkdirs();
204        }
205
206        // Create PID list dir
207        final File pidDir = new File(targetDir, "pid");
208        if (!pidDir.exists()) {
209            pidDir.mkdirs();
210        }
211
212        // Which F3 source are we using? - verify associated options
213        ObjectSource objectSource;
214        InternalIDResolver idResolver;
215        switch (f3SourceType) {
216            case EXPORTED:
217                notNull(f3ExportedDir, "f3ExportDir must be used with 'exported' source!");
218
219                objectSource = new ArchiveExportedFoxmlDirectoryObjectSource(f3ExportedDir, f3hostname);
220                break;
221            case AKUBRA:
222                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
223                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
224                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
225                        f3ObjectsDir.getAbsolutePath());
226
227                idResolver = new AkubraFSIDResolver(indexDir, f3DatastreamsDir);
228                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
229                break;
230            case LEGACY:
231                notNull(f3DatastreamsDir, "f3DatastreamsDir must be used with 'akubra' or 'legacy' source!");
232                notNull(f3ObjectsDir, "f3ObjectsDir must be used with 'akubra' or 'legacy' source!");
233                expressionTrue(f3ObjectsDir.exists(), f3ObjectsDir, "f3ObjectsDir must exist! " +
234                        f3ObjectsDir.getAbsolutePath());
235
236                idResolver = new LegacyFSIDResolver(indexDir, f3DatastreamsDir);
237                objectSource = new NativeFoxmlDirectoryObjectSource(f3ObjectsDir, idResolver, f3hostname);
238                break;
239            default:
240                throw new RuntimeException("Should never happen");
241        }
242
243        final OcflObjectSessionFactory ocflSessionFactory = new OcflSessionFactoryFactoryBean(ocflStorageDir.toPath(),
244                ocflStagingDir.toPath(), migrationType, user, userUri).getObject();
245
246        final FedoraObjectVersionHandler archiveGroupHandler =
247                new ArchiveGroupHandler(ocflSessionFactory, migrationType, addExtensions, deleteInactive, user);
248        final FedoraObjectHandler versionHandler = new VersionAbstractionFedoraObjectHandler(archiveGroupHandler);
249        final StreamingFedoraObjectHandler objectHandler = new ObjectAbstractionStreamingFedoraObjectHandler(
250                versionHandler);
251
252        // PID-list-managers
253        // - Resume PID manager: the second arg is "acceptAll". If resuming, we do not "acceptAll")
254        final ResumePidListManager resumeManager = new ResumePidListManager(pidDir, !resume);
255
256        // - PID-list manager
257        final UserProvidedPidListManager pidListManager = new UserProvidedPidListManager(pidFile);
258
259        final List<PidListManager> pidListManagerList = Arrays.asList(pidListManager, resumeManager);
260
261        final Migrator migrator = new Migrator();
262        migrator.setLimit(objectLimit);
263        migrator.setSource(objectSource);
264        migrator.setHandler(objectHandler);
265        migrator.setPidListManagers(pidListManagerList);
266        migrator.setContinueOnError(continueOnError);
267
268        try {
269            migrator.run();
270        } finally {
271            ocflSessionFactory.close();
272            FileUtils.deleteDirectory(ocflStagingDir);
273        }
274
275        return 0;
276    }
277
278}