001package org.fcrepo.migration;
002
003import static org.junit.Assert.assertEquals;
004import static org.junit.Assert.assertTrue;
005
006import java.io.ByteArrayInputStream;
007import java.io.File;
008import java.io.FileInputStream;
009import java.io.IOException;
010import java.nio.charset.StandardCharsets;
011import java.nio.file.Files;
012import java.nio.file.Path;
013import java.security.MessageDigest;
014import java.security.NoSuchAlgorithmException;
015import java.util.ArrayList;
016import java.util.HashMap;
017import java.util.Map;
018
019import edu.wisc.library.ocfl.api.OcflRepository;
020import edu.wisc.library.ocfl.api.model.FileDetails;
021import edu.wisc.library.ocfl.api.model.ObjectVersionId;
022import edu.wisc.library.ocfl.api.model.VersionDetails;
023import edu.wisc.library.ocfl.api.model.VersionInfo;
024import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig;
025import edu.wisc.library.ocfl.core.storage.OcflStorageBuilder;
026import org.apache.commons.codec.binary.Hex;
027import org.apache.commons.codec.digest.DigestUtils;
028import org.apache.commons.io.FileUtils;
029import org.apache.commons.io.IOUtils;
030import org.junit.After;
031import org.junit.Before;
032import org.junit.Test;
033
034import com.fasterxml.jackson.databind.JsonNode;
035import com.fasterxml.jackson.databind.ObjectMapper;
036
037import edu.wisc.library.ocfl.core.OcflRepositoryBuilder;
038import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleIdEncapsulationLayoutConfig;
039import picocli.CommandLine;
040
041/**
042 * @author bcail
043 */
044public class PicocliIT {
045
046    private Path tmpDir;
047    private Path targetDir;
048    private Path workingDir;
049
050    @Before
051    public void setup() throws IOException {
052        tmpDir = Files.createTempDirectory("migration-utils");
053        targetDir = tmpDir.resolve("target");
054        workingDir = tmpDir.resolve("working");
055    }
056
057    @After
058    public void tearDown() throws IOException {
059        try {
060            FileUtils.forceDelete(tmpDir.toFile());
061        } catch (final IOException io) {
062            System.err.println("Error cleaning up " + tmpDir.toString());
063            io.printStackTrace();
064        }
065    }
066
067    @Test
068    public void testPlainOcfl() throws Exception {
069        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
070                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
071                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
072                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
073        final PicocliMigrator migrator = new PicocliMigrator();
074        final CommandLine cmd = new CommandLine(migrator);
075
076        cmd.execute(args);
077        assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0")));
078        final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69")
079                .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6");
080        final File inventory = baseDir.resolve("inventory.json").toFile();
081        assertTrue(inventory.exists());
082        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
083        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
084    }
085
086    @Test
087    public void testPlainOcflEmptyIdPrefix() throws Exception {
088        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
089                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
090                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
091                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
092                "--id-prefix", ""};
093        final PicocliMigrator migrator = new PicocliMigrator();
094        final CommandLine cmd = new CommandLine(migrator);
095
096        final int result = cmd.execute(args);
097        assertEquals(0, result);
098        assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0")));
099        final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b")
100                .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8");
101        final File inventory = baseDir.resolve("inventory.json").toFile();
102        assertTrue(inventory.exists());
103        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
104        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
105    }
106
107    @Test
108    public void testFedoraOcflCantChangeIdPrefix() throws Exception {
109        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
110                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
111                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
112                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
113                "--id-prefix", ""};
114        final PicocliMigrator migrator = new PicocliMigrator();
115        final CommandLine cmd = new CommandLine(migrator);
116
117        final int result = cmd.execute(args);
118        assertEquals(1, result);
119    }
120
121    @Test
122    public void testPlainOcflNoWorkingDirOption() throws Exception {
123        final String[] args = {"--target-dir", targetDir.toString(),
124                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
125                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
126                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
127        final PicocliMigrator migrator = new PicocliMigrator();
128        final CommandLine cmd = new CommandLine(migrator);
129
130        cmd.execute(args);
131        final Path workingDir = Path.of(System.getProperty("user.dir"));
132        assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0")));
133        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
134        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
135    }
136
137    @Test
138    public void testFedoraOcfl() throws Exception {
139        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
140                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
141                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
142                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
143        final PicocliMigrator migrator = new PicocliMigrator();
144        final CommandLine cmd = new CommandLine(migrator);
145
146        cmd.execute(args);
147        assertTrue(Files.list(targetDir.resolve("data").resolve("ocfl-root"))
148                .anyMatch(element -> element.endsWith("0=ocfl_1.0")));
149        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
150        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
151    }
152
153    @Test
154    public void testExistingRepoDifferentStorageLayout() throws Exception {
155        //create repo with different storage layout
156        final var ocflRepo =  new OcflRepositoryBuilder()
157                .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig())
158                .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build())
159                .workDir(tmpDir)
160                .build();
161        assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0")));
162
163        //migrate object into it
164        final Path workingDir = tmpDir.resolve("working");
165        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
166                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
167                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
168                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
169                "--id-prefix", ""};
170        final PicocliMigrator migrator = new PicocliMigrator();
171        final CommandLine cmd = new CommandLine(migrator);
172        cmd.execute(args);
173
174        //verify that the correct storage layout was used - encapsulation directory is the encoded object id
175        assertTrue(Files.list(targetDir.resolve("750").resolve("677").resolve("e9b"))
176                .anyMatch(f -> f.endsWith("example%3a1")));
177    }
178
179    @Test
180    public void testMigrateFoxmlFileInsteadOfPropertyFiles() throws Exception {
181        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
182                "--source-type", "LEGACY", "--migration-type", "PLAIN_OCFL",
183                "--datastreams-dir", "src/test/resources/legacyFS/datastreams/2015/0430/16/01",
184                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
185                "--id-prefix", "", "--foxml-file"};
186        final PicocliMigrator migrator = new PicocliMigrator();
187        final CommandLine cmd = new CommandLine(migrator);
188        cmd.execute(args);
189
190        final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b")
191                .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8");
192        final File inventory = baseDir.resolve("inventory.json").toFile();
193        assertTrue(inventory.exists());
194        final var ocflRepo =  new OcflRepositoryBuilder()
195                .defaultLayoutConfig(new HashedNTupleLayoutConfig())
196                .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build())
197                .workDir(workingDir)
198                .build();
199        final var object = ocflRepo.getObject(ObjectVersionId.head("example:1"));
200        final ArrayList<String> files = new ArrayList<String>();
201        for (final var file: object.getFiles()) {
202            files.add(file.getPath());
203        }
204        final var expectedFiles = new ArrayList<String>();
205        expectedFiles.add("AUDIT");
206        expectedFiles.add("DS2");
207        expectedFiles.add("DS1");
208        expectedFiles.add("DS4");
209        expectedFiles.add("DS3");
210        expectedFiles.add("DC");
211        assertEquals(expectedFiles, files);
212        //now check for a FOXML, which should show up in a previous version
213        final var versions = ocflRepo.describeObject("example:1").getVersionMap().values();
214        boolean foundFoxml = false;
215        for (VersionDetails v : versions) {
216            for (FileDetails f : v.getFiles()) {
217                if (f.getPath().equals("FOXML")) {
218                    foundFoxml = true;
219                    break;
220                }
221            }
222        }
223        assertTrue(foundFoxml);
224    }
225
226    @Test
227    public void testInvalidDigestAlgorithm() throws Exception {
228        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
229                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
230                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
231                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
232                "--algorithm", "sha384"};
233        final PicocliMigrator migrator = new PicocliMigrator();
234        final CommandLine cmd = new CommandLine(migrator);
235
236        final int result = cmd.execute(args);
237        assertEquals(1, result);
238    }
239
240    /**
241     * MD5 is a supported algorithm under an OCFL extension, but we don't support it.
242     */
243    @Test
244    public void testInvalidForUsDigestAlgorithm() {
245        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
246                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
247                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
248                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
249                "--algorithm", "md5"};
250        final PicocliMigrator migrator = new PicocliMigrator();
251        final CommandLine cmd = new CommandLine(migrator);
252
253        final int result = cmd.execute(args);
254        assertEquals(1, result);
255    }
256
257    @Test
258    public void testSha256DigestAlgorithm() throws Exception {
259        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
260                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
261                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
262                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
263                "--algorithm", "sha256"};
264        final PicocliMigrator migrator = new PicocliMigrator();
265        final CommandLine cmd = new CommandLine(migrator);
266
267        final int result = cmd.execute(args);
268        assertEquals(0, result);
269        assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0")));
270        final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69")
271                .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6");
272        final File inventory = baseDir.resolve("inventory.json").toFile();
273        assertTrue(inventory.exists());
274        validateManifests(inventory, "SHA-256", baseDir);
275    }
276
277    @Test
278    public void testPlainOcflObjectAlreadyExistsInOcfl() throws Exception {
279        final var pid = "example:1";
280        final var ocflRepo =  new OcflRepositoryBuilder()
281                .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig())
282                .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build())
283                .workDir(tmpDir)
284                .build();
285        ocflRepo.updateObject(ObjectVersionId.head(pid), new VersionInfo(), updater -> {
286            updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1");
287        });
288        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
289                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
290                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
291                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
292                "--id-prefix", ""};
293        final PicocliMigrator migrator = new PicocliMigrator();
294        final CommandLine cmd = new CommandLine(migrator);
295        final int result = cmd.execute(args);
296        assertEquals(1, result); //should fail because object already exists
297    }
298
299    @Test
300    public void testFedoraOcflObjectAlreadyExistsInOcfl() throws Exception {
301        final var ocflObjectId = "info:fedora/example:1";
302        final var ocflRepo =  new OcflRepositoryBuilder()
303                .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig())
304                .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build())
305                .workDir(tmpDir)
306                .build();
307        ocflRepo.updateObject(ObjectVersionId.head(ocflObjectId), new VersionInfo(), updater -> {
308            updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1");
309        });
310        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
311                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
312                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
313                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
314        final PicocliMigrator migrator = new PicocliMigrator();
315        final CommandLine cmd = new CommandLine(migrator);
316        final int result = cmd.execute(args);
317        assertEquals(1, result); //should fail because object already exists
318    }
319
320    @Test
321    public void testInvalidChecksumErrorsPlain() throws Exception {
322        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
323                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
324                "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01",
325                "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"};
326        final PicocliMigrator migrator = new PicocliMigrator();
327        final CommandLine cmd = new CommandLine(migrator);
328        final int result = cmd.execute(args);
329        assertEquals(1, result); //should fail because of invalid checksum
330    }
331
332    @Test
333    public void testInvalidChecksumErrorsFedora() throws Exception {
334        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
335                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
336                "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01",
337                "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"};
338        final PicocliMigrator migrator = new PicocliMigrator();
339        final CommandLine cmd = new CommandLine(migrator);
340        final int result = cmd.execute(args);
341        assertEquals(1, result); //should fail because of invalid checksum
342    }
343
344    @Test
345    public void testInvalidChecksumCanBeAllowedPlain() throws Exception {
346        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
347                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
348                "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01",
349                "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01",
350                "--no-checksum-validation"};
351        final PicocliMigrator migrator = new PicocliMigrator();
352        final CommandLine cmd = new CommandLine(migrator);
353        final int result = cmd.execute(args);
354        assertEquals(0, result); //should succeed because checksum validation is disabled
355    }
356
357    @Test
358    public void testInvalidChecksumCanBeAllowedFedora() throws Exception {
359        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
360                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
361                "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01",
362                "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01",
363                "--no-checksum-validation"};
364        final PicocliMigrator migrator = new PicocliMigrator();
365        final CommandLine cmd = new CommandLine(migrator);
366        final int result = cmd.execute(args);
367        assertEquals(0, result); //should succeed because checksum validation is disabled
368    }
369
370    @Test
371    public void handleOutOfOrderDatastreamVersions() throws Exception {
372        final var ocflObjectId = "info:fedora/example:1";
373        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
374                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
375                "--datastreams-dir","src/test/resources/legacyFS-out-of-order/datastreams/2015/0430/16/01",
376                "--objects-dir", "src/test/resources/legacyFS-out-of-order/objects/2015/0430/16/01"};
377        final PicocliMigrator migrator = new PicocliMigrator();
378        final CommandLine cmd = new CommandLine(migrator);
379
380        final int result = cmd.execute(args);
381        assertEquals(0, result);
382
383        final var ocflRepo = createOcflRepo();
384
385        final var obj = ocflRepo.getObject(ObjectVersionId.head(ocflObjectId));
386        try (final var stream = obj.getFile("DS1").getStream()) {
387            assertEquals("\n<test>\n  This is a test that was edited.\n</test>\n",
388                    IOUtils.toString(stream, StandardCharsets.UTF_8));
389        }
390    }
391
392    /**
393     * Validate the manifest digests in an inventory file.
394     * @param inventory the inventory file
395     * @param digestAlgo the digest algorithm
396     * @param baseDir the path of the OCFL object
397     * @throws IOException issues opening the inventory file.
398     * @throws NoSuchAlgorithmException issues creating a MessageDigest.
399     */
400    private void validateManifests(final File inventory, final String digestAlgo, final Path baseDir)
401            throws IOException, NoSuchAlgorithmException {
402        final var manifests = getManifests(inventory);
403        final MessageDigest md = MessageDigest.getInstance(digestAlgo);
404        for (final var entry : manifests.entrySet()) {
405            final File f = baseDir.resolve(entry.getKey()).toFile();
406            assertTrue(f.exists());
407            final String digest = new String(Hex.encodeHex(DigestUtils.digest(md, new FileInputStream(f))));
408            assertEquals(entry.getValue(), digest);
409        }
410    }
411
412    /**
413     * Parse the manifest section out of an OCFL inventory file and return a map of filename -> hash
414     * @param inventory the OCFL inventory file
415     * @return map of file paths from the OCFL object root and their digests
416     * @throws IOException issues opening the inventory file.
417     */
418    private Map<String, String> getManifests(final File inventory) throws IOException {
419        final ObjectMapper mapper = new ObjectMapper();
420        final JsonNode rootNode = mapper.readTree(inventory);
421        final JsonNode manifestNode = rootNode.findValues("manifest").get(0);
422        final Map<String, String> fileManifest = new HashMap<>();
423        final var fieldIter = manifestNode.fields();
424        while (fieldIter.hasNext()) {
425            final var entry = fieldIter.next();
426            final String hash = entry.getKey();
427            if (entry.getValue().isArray()) {
428                // More than one file with the same hash
429                entry.getValue().spliterator().forEachRemaining(file -> fileManifest.put(file.asText(), hash));
430            } else {
431                fileManifest.put(entry.getValue().asText(), hash);
432            }
433        }
434        return fileManifest;
435    }
436
437    private OcflRepository createOcflRepo() {
438        return new OcflRepositoryBuilder()
439                .defaultLayoutConfig(new HashedNTupleLayoutConfig())
440                .storage(OcflStorageBuilder.builder().fileSystem(targetDir.resolve("data/ocfl-root")).build())
441                .workDir(workingDir)
442                .build();
443    }
444
445}