001package org.fcrepo.migration;
002
003import static org.junit.Assert.assertEquals;
004import static org.junit.Assert.assertTrue;
005
006import java.io.ByteArrayInputStream;
007import java.io.File;
008import java.io.FileInputStream;
009import java.io.IOException;
010import java.nio.charset.StandardCharsets;
011import java.nio.file.Files;
012import java.nio.file.Path;
013import java.security.MessageDigest;
014import java.security.NoSuchAlgorithmException;
015import java.util.ArrayList;
016import java.util.HashMap;
017import java.util.Map;
018
019import edu.wisc.library.ocfl.api.OcflRepository;
020import edu.wisc.library.ocfl.api.model.FileDetails;
021import edu.wisc.library.ocfl.api.model.ObjectVersionId;
022import edu.wisc.library.ocfl.api.model.VersionDetails;
023import edu.wisc.library.ocfl.api.model.VersionInfo;
024import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig;
025import edu.wisc.library.ocfl.core.storage.OcflStorageBuilder;
026import org.apache.commons.codec.binary.Hex;
027import org.apache.commons.codec.digest.DigestUtils;
028import org.apache.commons.io.FileUtils;
029import org.apache.commons.io.IOUtils;
030import org.junit.After;
031import org.junit.Before;
032import org.junit.Test;
033
034import com.fasterxml.jackson.databind.JsonNode;
035import com.fasterxml.jackson.databind.ObjectMapper;
036
037import edu.wisc.library.ocfl.core.OcflRepositoryBuilder;
038import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleIdEncapsulationLayoutConfig;
039import picocli.CommandLine;
040
041/**
042 * @author bcail
043 */
044public class PicocliIT {
045
046    private Path tmpDir;
047    private Path targetDir;
048    private Path workingDir;
049
050    @Before
051    public void setup() throws IOException {
052        tmpDir = Files.createTempDirectory("migration-utils");
053        targetDir = tmpDir.resolve("target");
054        workingDir = tmpDir.resolve("working");
055    }
056
057    @After
058    public void tearDown() throws IOException {
059        try {
060            FileUtils.forceDelete(tmpDir.toFile());
061        } catch (final IOException io) {
062            System.err.println("Error cleaning up " + tmpDir.toString());
063            io.printStackTrace();
064        }
065    }
066
067    private boolean checkDirForNamaste(final Path targetDir) throws IOException {
068        return Files.list(targetDir).map(Path::getFileName).map(Path::toString)
069                .anyMatch(e -> e.startsWith("0=ocfl_1."));
070    }
071
072    @Test
073    public void testPlainOcfl() throws Exception {
074        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
075                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
076                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
077                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
078        final PicocliMigrator migrator = new PicocliMigrator();
079        final CommandLine cmd = new CommandLine(migrator);
080
081        cmd.execute(args);
082        assertTrue(checkDirForNamaste(targetDir));
083        final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69")
084                .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6");
085        final File inventory = baseDir.resolve("inventory.json").toFile();
086        assertTrue(inventory.exists());
087        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
088        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
089    }
090
091    @Test
092    public void testPlainOcflEmptyIdPrefix() throws Exception {
093        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
094                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
095                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
096                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
097                "--id-prefix", ""};
098        final PicocliMigrator migrator = new PicocliMigrator();
099        final CommandLine cmd = new CommandLine(migrator);
100
101        final int result = cmd.execute(args);
102        assertEquals(0, result);
103        assertTrue(checkDirForNamaste(targetDir));
104        final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b")
105                .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8");
106        final File inventory = baseDir.resolve("inventory.json").toFile();
107        assertTrue(inventory.exists());
108        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
109        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
110    }
111
112    @Test
113    public void testFedoraOcflCantChangeIdPrefix() throws Exception {
114        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
115                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
116                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
117                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
118                "--id-prefix", ""};
119        final PicocliMigrator migrator = new PicocliMigrator();
120        final CommandLine cmd = new CommandLine(migrator);
121
122        final int result = cmd.execute(args);
123        assertEquals(1, result);
124    }
125
126    @Test
127    public void testPlainOcflNoWorkingDirOption() throws Exception {
128        final String[] args = {"--target-dir", targetDir.toString(),
129                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
130                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
131                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
132        final PicocliMigrator migrator = new PicocliMigrator();
133        final CommandLine cmd = new CommandLine(migrator);
134
135        cmd.execute(args);
136        final Path workingDir = Path.of(System.getProperty("user.dir"));
137        assertTrue(checkDirForNamaste(targetDir));
138        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
139        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
140    }
141
142    @Test
143    public void testFedoraOcfl() throws Exception {
144        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
145                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
146                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
147                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
148        final PicocliMigrator migrator = new PicocliMigrator();
149        final CommandLine cmd = new CommandLine(migrator);
150
151        cmd.execute(args);
152        assertTrue(checkDirForNamaste(targetDir.resolve("data").resolve("ocfl-root")));
153        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
154        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
155    }
156
157    @Test
158    public void testExistingRepoDifferentStorageLayout() throws Exception {
159        //create repo with different storage layout
160        final var ocflRepo =  new OcflRepositoryBuilder()
161                .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig())
162                .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build())
163                .workDir(tmpDir)
164                .build();
165        assertTrue(checkDirForNamaste(targetDir));
166
167        //migrate object into it
168        final Path workingDir = tmpDir.resolve("working");
169        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
170                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
171                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
172                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
173                "--id-prefix", ""};
174        final PicocliMigrator migrator = new PicocliMigrator();
175        final CommandLine cmd = new CommandLine(migrator);
176        cmd.execute(args);
177
178        //verify that the correct storage layout was used - encapsulation directory is the encoded object id
179        assertTrue(Files.list(targetDir.resolve("750").resolve("677").resolve("e9b"))
180                .anyMatch(f -> f.endsWith("example%3a1")));
181    }
182
183    @Test
184    public void testMigrateFoxmlFileInsteadOfPropertyFiles() throws Exception {
185        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
186                "--source-type", "LEGACY", "--migration-type", "PLAIN_OCFL",
187                "--datastreams-dir", "src/test/resources/legacyFS/datastreams/2015/0430/16/01",
188                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
189                "--id-prefix", "", "--foxml-file"};
190        final PicocliMigrator migrator = new PicocliMigrator();
191        final CommandLine cmd = new CommandLine(migrator);
192        cmd.execute(args);
193
194        final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b")
195                .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8");
196        final File inventory = baseDir.resolve("inventory.json").toFile();
197        assertTrue(inventory.exists());
198        final var ocflRepo =  new OcflRepositoryBuilder()
199                .defaultLayoutConfig(new HashedNTupleLayoutConfig())
200                .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build())
201                .workDir(workingDir)
202                .build();
203        final var object = ocflRepo.getObject(ObjectVersionId.head("example:1"));
204        final ArrayList<String> files = new ArrayList<String>();
205        for (final var file: object.getFiles()) {
206            files.add(file.getPath());
207        }
208        final var expectedFiles = new ArrayList<String>();
209        expectedFiles.add("AUDIT");
210        expectedFiles.add("DS2");
211        expectedFiles.add("DS1");
212        expectedFiles.add("DS4");
213        expectedFiles.add("DS3");
214        expectedFiles.add("DC");
215        assertEquals(expectedFiles, files);
216        //now check for a FOXML, which should show up in a previous version
217        final var versions = ocflRepo.describeObject("example:1").getVersionMap().values();
218        boolean foundFoxml = false;
219        for (final VersionDetails v : versions) {
220            for (final FileDetails f : v.getFiles()) {
221                if (f.getPath().equals("FOXML")) {
222                    foundFoxml = true;
223                    break;
224                }
225            }
226        }
227        assertTrue(foundFoxml);
228    }
229
230    @Test
231    public void testInvalidDigestAlgorithm() throws Exception {
232        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
233                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
234                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
235                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
236                "--algorithm", "sha384"};
237        final PicocliMigrator migrator = new PicocliMigrator();
238        final CommandLine cmd = new CommandLine(migrator);
239
240        final int result = cmd.execute(args);
241        assertEquals(1, result);
242    }
243
244    /**
245     * MD5 is a supported algorithm under an OCFL extension, but we don't support it.
246     */
247    @Test
248    public void testInvalidForUsDigestAlgorithm() {
249        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
250                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
251                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
252                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
253                "--algorithm", "md5"};
254        final PicocliMigrator migrator = new PicocliMigrator();
255        final CommandLine cmd = new CommandLine(migrator);
256
257        final int result = cmd.execute(args);
258        assertEquals(1, result);
259    }
260
261    @Test
262    public void testSha256DigestAlgorithm() throws Exception {
263        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
264                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
265                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
266                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
267                "--algorithm", "sha256"};
268        final PicocliMigrator migrator = new PicocliMigrator();
269        final CommandLine cmd = new CommandLine(migrator);
270
271        final int result = cmd.execute(args);
272        assertEquals(0, result);
273        assertTrue(checkDirForNamaste(targetDir));
274        final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69")
275                .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6");
276        final File inventory = baseDir.resolve("inventory.json").toFile();
277        assertTrue(inventory.exists());
278        validateManifests(inventory, "SHA-256", baseDir);
279    }
280
281    @Test
282    public void testPlainOcflObjectAlreadyExistsInOcfl() throws Exception {
283        final var pid = "example:1";
284        final var ocflRepo =  new OcflRepositoryBuilder()
285                .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig())
286                .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build())
287                .workDir(tmpDir)
288                .build();
289        ocflRepo.updateObject(ObjectVersionId.head(pid), new VersionInfo(), updater -> {
290            updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1");
291        });
292        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
293                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
294                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
295                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
296                "--id-prefix", ""};
297        final PicocliMigrator migrator = new PicocliMigrator();
298        final CommandLine cmd = new CommandLine(migrator);
299        final int result = cmd.execute(args);
300        assertEquals(1, result); //should fail because object already exists
301    }
302
303    @Test
304    public void testFedoraOcflObjectAlreadyExistsInOcfl() throws Exception {
305        final var ocflObjectId = "info:fedora/example:1";
306        final var ocflRepo =  new OcflRepositoryBuilder()
307                .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig())
308                .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build())
309                .workDir(tmpDir)
310                .build();
311        ocflRepo.updateObject(ObjectVersionId.head(ocflObjectId), new VersionInfo(), updater -> {
312            updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1");
313        });
314        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
315                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
316                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
317                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
318        final PicocliMigrator migrator = new PicocliMigrator();
319        final CommandLine cmd = new CommandLine(migrator);
320        final int result = cmd.execute(args);
321        assertEquals(1, result); //should fail because object already exists
322    }
323
324    @Test
325    public void testInvalidChecksumErrorsPlain() throws Exception {
326        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
327                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
328                "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01",
329                "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"};
330        final PicocliMigrator migrator = new PicocliMigrator();
331        final CommandLine cmd = new CommandLine(migrator);
332        final int result = cmd.execute(args);
333        assertEquals(1, result); //should fail because of invalid checksum
334    }
335
336    @Test
337    public void testInvalidChecksumErrorsFedora() throws Exception {
338        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
339                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
340                "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01",
341                "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"};
342        final PicocliMigrator migrator = new PicocliMigrator();
343        final CommandLine cmd = new CommandLine(migrator);
344        final int result = cmd.execute(args);
345        assertEquals(1, result); //should fail because of invalid checksum
346    }
347
348    @Test
349    public void testInvalidChecksumCanBeAllowedPlain() throws Exception {
350        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
351                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
352                "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01",
353                "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01",
354                "--no-checksum-validation"};
355        final PicocliMigrator migrator = new PicocliMigrator();
356        final CommandLine cmd = new CommandLine(migrator);
357        final int result = cmd.execute(args);
358        assertEquals(0, result); //should succeed because checksum validation is disabled
359    }
360
361    @Test
362    public void testInvalidChecksumCanBeAllowedFedora() throws Exception {
363        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
364                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
365                "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01",
366                "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01",
367                "--no-checksum-validation"};
368        final PicocliMigrator migrator = new PicocliMigrator();
369        final CommandLine cmd = new CommandLine(migrator);
370        final int result = cmd.execute(args);
371        assertEquals(0, result); //should succeed because checksum validation is disabled
372    }
373
374    @Test
375    public void handleOutOfOrderDatastreamVersions() throws Exception {
376        final var ocflObjectId = "info:fedora/example:1";
377        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
378                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
379                "--datastreams-dir","src/test/resources/legacyFS-out-of-order/datastreams/2015/0430/16/01",
380                "--objects-dir", "src/test/resources/legacyFS-out-of-order/objects/2015/0430/16/01"};
381        final PicocliMigrator migrator = new PicocliMigrator();
382        final CommandLine cmd = new CommandLine(migrator);
383
384        final int result = cmd.execute(args);
385        assertEquals(0, result);
386
387        final var ocflRepo = createOcflRepo();
388
389        final var obj = ocflRepo.getObject(ObjectVersionId.head(ocflObjectId));
390        try (final var stream = obj.getFile("DS1").getStream()) {
391            assertEquals("\n<test>\n  This is a test that was edited.\n</test>\n",
392                    IOUtils.toString(stream, StandardCharsets.UTF_8));
393        }
394    }
395
396    /**
397     * Validate the manifest digests in an inventory file.
398     * @param inventory the inventory file
399     * @param digestAlgo the digest algorithm
400     * @param baseDir the path of the OCFL object
401     * @throws IOException issues opening the inventory file.
402     * @throws NoSuchAlgorithmException issues creating a MessageDigest.
403     */
404    private void validateManifests(final File inventory, final String digestAlgo, final Path baseDir)
405            throws IOException, NoSuchAlgorithmException {
406        final var manifests = getManifests(inventory);
407        final MessageDigest md = MessageDigest.getInstance(digestAlgo);
408        for (final var entry : manifests.entrySet()) {
409            final File f = baseDir.resolve(entry.getKey()).toFile();
410            assertTrue(f.exists());
411            final String digest = new String(Hex.encodeHex(DigestUtils.digest(md, new FileInputStream(f))));
412            assertEquals(entry.getValue(), digest);
413        }
414    }
415
416    /**
417     * Parse the manifest section out of an OCFL inventory file and return a map of filename -> hash
418     * @param inventory the OCFL inventory file
419     * @return map of file paths from the OCFL object root and their digests
420     * @throws IOException issues opening the inventory file.
421     */
422    private Map<String, String> getManifests(final File inventory) throws IOException {
423        final ObjectMapper mapper = new ObjectMapper();
424        final JsonNode rootNode = mapper.readTree(inventory);
425        final JsonNode manifestNode = rootNode.findValues("manifest").get(0);
426        final Map<String, String> fileManifest = new HashMap<>();
427        final var fieldIter = manifestNode.fields();
428        while (fieldIter.hasNext()) {
429            final var entry = fieldIter.next();
430            final String hash = entry.getKey();
431            if (entry.getValue().isArray()) {
432                // More than one file with the same hash
433                entry.getValue().spliterator().forEachRemaining(file -> fileManifest.put(file.asText(), hash));
434            } else {
435                fileManifest.put(entry.getValue().asText(), hash);
436            }
437        }
438        return fileManifest;
439    }
440
441    private OcflRepository createOcflRepo() {
442        return new OcflRepositoryBuilder()
443                .defaultLayoutConfig(new HashedNTupleLayoutConfig())
444                .storage(OcflStorageBuilder.builder().fileSystem(targetDir.resolve("data/ocfl-root")).build())
445                .workDir(workingDir)
446                .build();
447    }
448
449}