001package org.fcrepo.migration;
002
003import static org.junit.Assert.assertEquals;
004import static org.junit.Assert.assertTrue;
005
006import java.io.ByteArrayInputStream;
007import java.io.File;
008import java.io.FileInputStream;
009import java.io.IOException;
010import java.nio.charset.StandardCharsets;
011import java.nio.file.Files;
012import java.nio.file.Path;
013import java.security.MessageDigest;
014import java.security.NoSuchAlgorithmException;
015import java.util.ArrayList;
016import java.util.HashMap;
017import java.util.Map;
018
019import edu.wisc.library.ocfl.api.model.FileDetails;
020import edu.wisc.library.ocfl.api.model.ObjectVersionId;
021import edu.wisc.library.ocfl.api.model.VersionDetails;
022import edu.wisc.library.ocfl.api.model.VersionInfo;
023import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig;
024import org.apache.commons.codec.binary.Hex;
025import org.apache.commons.codec.digest.DigestUtils;
026import org.apache.commons.io.FileUtils;
027import org.junit.After;
028import org.junit.Before;
029import org.junit.Test;
030
031import com.fasterxml.jackson.databind.JsonNode;
032import com.fasterxml.jackson.databind.ObjectMapper;
033
034import edu.wisc.library.ocfl.core.OcflRepositoryBuilder;
035import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleIdEncapsulationLayoutConfig;
036import edu.wisc.library.ocfl.core.storage.filesystem.FileSystemOcflStorage;
037import picocli.CommandLine;
038
039/**
040 * @author bcail
041 */
042public class PicocliIT {
043
044    private Path tmpDir;
045
046    @Before
047    public void setup() throws IOException {
048        tmpDir = Files.createTempDirectory("migration-utils");
049    }
050
051    @After
052    public void tearDown() throws IOException {
053        try {
054            FileUtils.forceDelete(tmpDir.toFile());
055        } catch (final IOException io) {
056            System.err.println("Error cleaning up " + tmpDir.toString());
057            io.printStackTrace();
058        }
059    }
060
061    @Test
062    public void testPlainOcfl() throws Exception {
063        final Path targetDir = tmpDir.resolve("target");
064        final Path workingDir = tmpDir.resolve("working");
065        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
066                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
067                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
068                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
069        final PicocliMigrator migrator = new PicocliMigrator();
070        final CommandLine cmd = new CommandLine(migrator);
071
072        cmd.execute(args);
073        assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0")));
074        final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69")
075                .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6");
076        final File inventory = baseDir.resolve("inventory.json").toFile();
077        assertTrue(inventory.exists());
078        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
079        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
080    }
081
082    @Test
083    public void testPlainOcflEmptyIdPrefix() throws Exception {
084        final Path targetDir = tmpDir.resolve("target");
085        final Path workingDir = tmpDir.resolve("working");
086        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
087                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
088                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
089                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
090                "--id-prefix", ""};
091        final PicocliMigrator migrator = new PicocliMigrator();
092        final CommandLine cmd = new CommandLine(migrator);
093
094        final int result = cmd.execute(args);
095        assertEquals(0, result);
096        assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0")));
097        final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b")
098                .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8");
099        final File inventory = baseDir.resolve("inventory.json").toFile();
100        assertTrue(inventory.exists());
101        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
102        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
103    }
104
105    @Test
106    public void testFedoraOcflCantChangeIdPrefix() throws Exception {
107        final Path targetDir = tmpDir.resolve("target");
108        final Path workingDir = tmpDir.resolve("working");
109        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
110                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
111                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
112                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
113                "--id-prefix", ""};
114        final PicocliMigrator migrator = new PicocliMigrator();
115        final CommandLine cmd = new CommandLine(migrator);
116
117        final int result = cmd.execute(args);
118        assertEquals(1, result);
119    }
120
121    @Test
122    public void testPlainOcflNoWorkingDirOption() throws Exception {
123        final Path targetDir = tmpDir.resolve("target");
124        final String[] args = {"--target-dir", targetDir.toString(),
125                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
126                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
127                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
128        final PicocliMigrator migrator = new PicocliMigrator();
129        final CommandLine cmd = new CommandLine(migrator);
130
131        cmd.execute(args);
132        final Path workingDir = Path.of(System.getProperty("user.dir"));
133        assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0")));
134        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
135        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
136    }
137
138    @Test
139    public void testFedoraOcfl() throws Exception {
140        final Path targetDir = tmpDir.resolve("target");
141        final Path workingDir = tmpDir.resolve("working");
142        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
143                "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL",
144                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
145                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
146        final PicocliMigrator migrator = new PicocliMigrator();
147        final CommandLine cmd = new CommandLine(migrator);
148
149        cmd.execute(args);
150        assertTrue(Files.list(targetDir.resolve("data").resolve("ocfl-root"))
151                .anyMatch(element -> element.endsWith("0=ocfl_1.0")));
152        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index")));
153        assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid")));
154    }
155
156    @Test
157    public void testExistingRepoDifferentStorageLayout() throws Exception {
158        //create repo with different storage layout
159        final Path targetDir = tmpDir.resolve("target");
160        final var ocflRepo =  new OcflRepositoryBuilder()
161                .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig())
162                .storage(FileSystemOcflStorage.builder().repositoryRoot(targetDir).build())
163                .workDir(tmpDir)
164                .build();
165        assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0")));
166
167        //migrate object into it
168        final Path workingDir = tmpDir.resolve("working");
169        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
170                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
171                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
172                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
173                "--id-prefix", ""};
174        final PicocliMigrator migrator = new PicocliMigrator();
175        final CommandLine cmd = new CommandLine(migrator);
176        cmd.execute(args);
177
178        //verify that the correct storage layout was used - encapsulation directory is the encoded object id
179        assertTrue(Files.list(targetDir.resolve("750").resolve("677").resolve("e9b"))
180                .anyMatch(f -> f.endsWith("example%3a1")));
181    }
182
183    @Test
184    public void testMigrateFoxmlFileInsteadOfPropertyFiles() throws Exception {
185        final Path targetDir = tmpDir.resolve("target");
186        final Path workingDir = tmpDir.resolve("working");
187        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
188                "--source-type", "LEGACY", "--migration-type", "PLAIN_OCFL",
189                "--datastreams-dir", "src/test/resources/legacyFS/datastreams/2015/0430/16/01",
190                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
191                "--id-prefix", "", "--foxml-file"};
192        final PicocliMigrator migrator = new PicocliMigrator();
193        final CommandLine cmd = new CommandLine(migrator);
194        cmd.execute(args);
195
196        final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b")
197                .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8");
198        final File inventory = baseDir.resolve("inventory.json").toFile();
199        assertTrue(inventory.exists());
200        final var ocflRepo =  new OcflRepositoryBuilder()
201                .defaultLayoutConfig(new HashedNTupleLayoutConfig())
202                .storage(FileSystemOcflStorage.builder().repositoryRoot(targetDir).build())
203                .workDir(workingDir)
204                .build();
205        final var object = ocflRepo.getObject(ObjectVersionId.head("example:1"));
206        final ArrayList<String> files = new ArrayList<String>();
207        for (final var file: object.getFiles()) {
208            files.add(file.getPath());
209        }
210        final var expectedFiles = new ArrayList<String>();
211        expectedFiles.add("AUDIT");
212        expectedFiles.add("DS2");
213        expectedFiles.add("DS1");
214        expectedFiles.add("DS4");
215        expectedFiles.add("DS3");
216        expectedFiles.add("DC");
217        assertEquals(expectedFiles, files);
218        //now check for a FOXML, which should show up in a previous version
219        final var versions = ocflRepo.describeObject("example:1").getVersionMap().values();
220        boolean foundFoxml = false;
221        for (VersionDetails v : versions) {
222            for (FileDetails f : v.getFiles()) {
223                if (f.getPath().equals("FOXML")) {
224                    foundFoxml = true;
225                    break;
226                }
227            }
228        }
229        assertTrue(foundFoxml);
230    }
231
232    @Test
233    public void testInvalidDigestAlgorithm() throws Exception {
234        final Path targetDir = tmpDir.resolve("target");
235        final Path workingDir = tmpDir.resolve("working");
236        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
237                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
238                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
239                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
240                "--algorithm", "sha384"};
241        final PicocliMigrator migrator = new PicocliMigrator();
242        final CommandLine cmd = new CommandLine(migrator);
243
244        final int result = cmd.execute(args);
245        assertEquals(1, result);
246    }
247
248    /**
249     * MD5 is a supported algorithm under an OCFL extension, but we don't support it.
250     */
251    @Test
252    public void testInvalidForUsDigestAlgorithm() {
253        final Path targetDir = tmpDir.resolve("target");
254        final Path workingDir = tmpDir.resolve("working");
255        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
256                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
257                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
258                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
259                "--algorithm", "md5"};
260        final PicocliMigrator migrator = new PicocliMigrator();
261        final CommandLine cmd = new CommandLine(migrator);
262
263        final int result = cmd.execute(args);
264        assertEquals(1, result);
265    }
266
267    @Test
268    public void testSha256DigestAlgorithm() throws Exception {
269        final Path targetDir = tmpDir.resolve("target");
270        final Path workingDir = tmpDir.resolve("working");
271        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
272                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
273                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
274                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
275                "--algorithm", "sha256"};
276        final PicocliMigrator migrator = new PicocliMigrator();
277        final CommandLine cmd = new CommandLine(migrator);
278
279        final int result = cmd.execute(args);
280        assertEquals(0, result);
281        assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0")));
282        final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69")
283                .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6");
284        final File inventory = baseDir.resolve("inventory.json").toFile();
285        assertTrue(inventory.exists());
286        validateManifests(inventory, "SHA-256", baseDir);
287    }
288
289    @Test
290    public void testPlainOcflObjectAlreadyExistsInOcfl() throws Exception {
291        final Path targetDir = tmpDir.resolve("target");
292        final Path workingDir = tmpDir.resolve("working");
293        final var pid = "example:1";
294        final var ocflRepo =  new OcflRepositoryBuilder()
295                .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig())
296                .storage(FileSystemOcflStorage.builder().repositoryRoot(targetDir).build())
297                .workDir(tmpDir)
298                .build();
299        ocflRepo.updateObject(ObjectVersionId.head(pid), new VersionInfo(), updater -> {
300            updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1");
301        });
302        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
303                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
304                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
305                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01",
306                "--id-prefix", ""};
307        final PicocliMigrator migrator = new PicocliMigrator();
308        final CommandLine cmd = new CommandLine(migrator);
309        final int result = cmd.execute(args);
310        assertEquals(1, result); //should fail because object already exists
311    }
312
313    @Test
314    public void testFedoraOcflObjectAlreadyExistsInOcfl() throws Exception {
315        final Path targetDir = tmpDir.resolve("target");
316        final Path workingDir = tmpDir.resolve("working");
317        final var ocflObjectId = "info:fedora/example:1";
318        final var ocflRepo =  new OcflRepositoryBuilder()
319                .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig())
320                .storage(FileSystemOcflStorage.builder().repositoryRoot(targetDir).build())
321                .workDir(tmpDir)
322                .build();
323        ocflRepo.updateObject(ObjectVersionId.head(ocflObjectId), new VersionInfo(), updater -> {
324            updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1");
325        });
326        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
327                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
328                "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01",
329                "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"};
330        final PicocliMigrator migrator = new PicocliMigrator();
331        final CommandLine cmd = new CommandLine(migrator);
332        final int result = cmd.execute(args);
333        assertEquals(1, result); //should fail because object already exists
334    }
335
336    @Test
337    public void testInvalidChecksumErrors() throws Exception {
338        final Path targetDir = tmpDir.resolve("target");
339        final Path workingDir = tmpDir.resolve("working");
340        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
341                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
342                "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01",
343                "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"};
344        final PicocliMigrator migrator = new PicocliMigrator();
345        final CommandLine cmd = new CommandLine(migrator);
346        final int result = cmd.execute(args);
347        assertEquals(1, result); //should fail because of invalid checksum
348    }
349
350    @Test
351    public void testInvalidChecksumCanBeAllowed() throws Exception {
352        final Path targetDir = tmpDir.resolve("target");
353        final Path workingDir = tmpDir.resolve("working");
354        final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(),
355                "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL",
356                "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01",
357                "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01",
358                "--no-checksum-validation"};
359        final PicocliMigrator migrator = new PicocliMigrator();
360        final CommandLine cmd = new CommandLine(migrator);
361        final int result = cmd.execute(args);
362        assertEquals(0, result); //should succeed because checksum validation is disabled
363    }
364
365    /**
366     * Validate the manifest digests in an inventory file.
367     * @param inventory the inventory file
368     * @param digestAlgo the digest algorithm
369     * @param baseDir the path of the OCFL object
370     * @throws IOException issues opening the inventory file.
371     * @throws NoSuchAlgorithmException issues creating a MessageDigest.
372     */
373    private void validateManifests(final File inventory, final String digestAlgo, final Path baseDir)
374            throws IOException, NoSuchAlgorithmException {
375        final var manifests = getManifests(inventory);
376        final MessageDigest md = MessageDigest.getInstance(digestAlgo);
377        for (final var entry : manifests.entrySet()) {
378            final File f = baseDir.resolve(entry.getKey()).toFile();
379            assertTrue(f.exists());
380            final String digest = new String(Hex.encodeHex(DigestUtils.digest(md, new FileInputStream(f))));
381            assertEquals(entry.getValue(), digest);
382        }
383    }
384
385    /**
386     * Parse the manifest section out of an OCFL inventory file and return a map of filename -> hash
387     * @param inventory the OCFL inventory file
388     * @return map of file paths from the OCFL object root and their digests
389     * @throws IOException issues opening the inventory file.
390     */
391    private Map<String, String> getManifests(final File inventory) throws IOException {
392        final ObjectMapper mapper = new ObjectMapper();
393        final JsonNode rootNode = mapper.readTree(inventory);
394        final JsonNode manifestNode = rootNode.findValues("manifest").get(0);
395        final Map<String, String> fileManifest = new HashMap<>();
396        final var fieldIter = manifestNode.fields();
397        while (fieldIter.hasNext()) {
398            final var entry = fieldIter.next();
399            final String hash = entry.getKey();
400            if (entry.getValue().isArray()) {
401                // More than one file with the same hash
402                entry.getValue().spliterator().forEachRemaining(file -> fileManifest.put(file.asText(), hash));
403            } else {
404                fileManifest.put(entry.getValue().asText(), hash);
405            }
406        }
407        return fileManifest;
408    }
409}