001package org.fcrepo.migration; 002 003import static org.junit.Assert.assertEquals; 004import static org.junit.Assert.assertTrue; 005 006import java.io.ByteArrayInputStream; 007import java.io.File; 008import java.io.FileInputStream; 009import java.io.IOException; 010import java.nio.charset.StandardCharsets; 011import java.nio.file.Files; 012import java.nio.file.Path; 013import java.security.MessageDigest; 014import java.security.NoSuchAlgorithmException; 015import java.util.ArrayList; 016import java.util.HashMap; 017import java.util.Map; 018 019import edu.wisc.library.ocfl.api.OcflRepository; 020import edu.wisc.library.ocfl.api.model.FileDetails; 021import edu.wisc.library.ocfl.api.model.ObjectVersionId; 022import edu.wisc.library.ocfl.api.model.VersionDetails; 023import edu.wisc.library.ocfl.api.model.VersionInfo; 024import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig; 025import edu.wisc.library.ocfl.core.storage.OcflStorageBuilder; 026import org.apache.commons.codec.binary.Hex; 027import org.apache.commons.codec.digest.DigestUtils; 028import org.apache.commons.io.FileUtils; 029import org.apache.commons.io.IOUtils; 030import org.junit.After; 031import org.junit.Before; 032import org.junit.Test; 033 034import com.fasterxml.jackson.databind.JsonNode; 035import com.fasterxml.jackson.databind.ObjectMapper; 036 037import edu.wisc.library.ocfl.core.OcflRepositoryBuilder; 038import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleIdEncapsulationLayoutConfig; 039import picocli.CommandLine; 040 041/** 042 * @author bcail 043 */ 044public class PicocliIT { 045 046 private Path tmpDir; 047 private Path targetDir; 048 private Path workingDir; 049 050 @Before 051 public void setup() throws IOException { 052 tmpDir = Files.createTempDirectory("migration-utils"); 053 targetDir = tmpDir.resolve("target"); 054 workingDir = tmpDir.resolve("working"); 055 } 056 057 @After 058 public void tearDown() throws IOException { 059 try { 060 FileUtils.forceDelete(tmpDir.toFile()); 061 } catch (final IOException io) { 062 System.err.println("Error cleaning up " + tmpDir.toString()); 063 io.printStackTrace(); 064 } 065 } 066 067 @Test 068 public void testPlainOcfl() throws Exception { 069 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 070 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 071 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 072 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 073 final PicocliMigrator migrator = new PicocliMigrator(); 074 final CommandLine cmd = new CommandLine(migrator); 075 076 cmd.execute(args); 077 assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 078 final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69") 079 .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6"); 080 final File inventory = baseDir.resolve("inventory.json").toFile(); 081 assertTrue(inventory.exists()); 082 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 083 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 084 } 085 086 @Test 087 public void testPlainOcflEmptyIdPrefix() throws Exception { 088 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 089 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 090 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 091 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 092 "--id-prefix", ""}; 093 final PicocliMigrator migrator = new PicocliMigrator(); 094 final CommandLine cmd = new CommandLine(migrator); 095 096 final int result = cmd.execute(args); 097 assertEquals(0, result); 098 assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 099 final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b") 100 .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8"); 101 final File inventory = baseDir.resolve("inventory.json").toFile(); 102 assertTrue(inventory.exists()); 103 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 104 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 105 } 106 107 @Test 108 public void testFedoraOcflCantChangeIdPrefix() throws Exception { 109 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 110 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 111 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 112 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 113 "--id-prefix", ""}; 114 final PicocliMigrator migrator = new PicocliMigrator(); 115 final CommandLine cmd = new CommandLine(migrator); 116 117 final int result = cmd.execute(args); 118 assertEquals(1, result); 119 } 120 121 @Test 122 public void testPlainOcflNoWorkingDirOption() throws Exception { 123 final String[] args = {"--target-dir", targetDir.toString(), 124 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 125 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 126 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 127 final PicocliMigrator migrator = new PicocliMigrator(); 128 final CommandLine cmd = new CommandLine(migrator); 129 130 cmd.execute(args); 131 final Path workingDir = Path.of(System.getProperty("user.dir")); 132 assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 133 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 134 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 135 } 136 137 @Test 138 public void testFedoraOcfl() throws Exception { 139 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 140 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 141 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 142 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 143 final PicocliMigrator migrator = new PicocliMigrator(); 144 final CommandLine cmd = new CommandLine(migrator); 145 146 cmd.execute(args); 147 assertTrue(Files.list(targetDir.resolve("data").resolve("ocfl-root")) 148 .anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 149 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 150 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 151 } 152 153 @Test 154 public void testExistingRepoDifferentStorageLayout() throws Exception { 155 //create repo with different storage layout 156 final var ocflRepo = new OcflRepositoryBuilder() 157 .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig()) 158 .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build()) 159 .workDir(tmpDir) 160 .build(); 161 assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 162 163 //migrate object into it 164 final Path workingDir = tmpDir.resolve("working"); 165 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 166 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 167 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 168 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 169 "--id-prefix", ""}; 170 final PicocliMigrator migrator = new PicocliMigrator(); 171 final CommandLine cmd = new CommandLine(migrator); 172 cmd.execute(args); 173 174 //verify that the correct storage layout was used - encapsulation directory is the encoded object id 175 assertTrue(Files.list(targetDir.resolve("750").resolve("677").resolve("e9b")) 176 .anyMatch(f -> f.endsWith("example%3a1"))); 177 } 178 179 @Test 180 public void testMigrateFoxmlFileInsteadOfPropertyFiles() throws Exception { 181 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 182 "--source-type", "LEGACY", "--migration-type", "PLAIN_OCFL", 183 "--datastreams-dir", "src/test/resources/legacyFS/datastreams/2015/0430/16/01", 184 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 185 "--id-prefix", "", "--foxml-file"}; 186 final PicocliMigrator migrator = new PicocliMigrator(); 187 final CommandLine cmd = new CommandLine(migrator); 188 cmd.execute(args); 189 190 final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b") 191 .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8"); 192 final File inventory = baseDir.resolve("inventory.json").toFile(); 193 assertTrue(inventory.exists()); 194 final var ocflRepo = new OcflRepositoryBuilder() 195 .defaultLayoutConfig(new HashedNTupleLayoutConfig()) 196 .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build()) 197 .workDir(workingDir) 198 .build(); 199 final var object = ocflRepo.getObject(ObjectVersionId.head("example:1")); 200 final ArrayList<String> files = new ArrayList<String>(); 201 for (final var file: object.getFiles()) { 202 files.add(file.getPath()); 203 } 204 final var expectedFiles = new ArrayList<String>(); 205 expectedFiles.add("AUDIT"); 206 expectedFiles.add("DS2"); 207 expectedFiles.add("DS1"); 208 expectedFiles.add("DS4"); 209 expectedFiles.add("DS3"); 210 expectedFiles.add("DC"); 211 assertEquals(expectedFiles, files); 212 //now check for a FOXML, which should show up in a previous version 213 final var versions = ocflRepo.describeObject("example:1").getVersionMap().values(); 214 boolean foundFoxml = false; 215 for (VersionDetails v : versions) { 216 for (FileDetails f : v.getFiles()) { 217 if (f.getPath().equals("FOXML")) { 218 foundFoxml = true; 219 break; 220 } 221 } 222 } 223 assertTrue(foundFoxml); 224 } 225 226 @Test 227 public void testInvalidDigestAlgorithm() throws Exception { 228 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 229 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 230 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 231 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 232 "--algorithm", "sha384"}; 233 final PicocliMigrator migrator = new PicocliMigrator(); 234 final CommandLine cmd = new CommandLine(migrator); 235 236 final int result = cmd.execute(args); 237 assertEquals(1, result); 238 } 239 240 /** 241 * MD5 is a supported algorithm under an OCFL extension, but we don't support it. 242 */ 243 @Test 244 public void testInvalidForUsDigestAlgorithm() { 245 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 246 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 247 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 248 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 249 "--algorithm", "md5"}; 250 final PicocliMigrator migrator = new PicocliMigrator(); 251 final CommandLine cmd = new CommandLine(migrator); 252 253 final int result = cmd.execute(args); 254 assertEquals(1, result); 255 } 256 257 @Test 258 public void testSha256DigestAlgorithm() throws Exception { 259 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 260 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 261 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 262 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 263 "--algorithm", "sha256"}; 264 final PicocliMigrator migrator = new PicocliMigrator(); 265 final CommandLine cmd = new CommandLine(migrator); 266 267 final int result = cmd.execute(args); 268 assertEquals(0, result); 269 assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 270 final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69") 271 .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6"); 272 final File inventory = baseDir.resolve("inventory.json").toFile(); 273 assertTrue(inventory.exists()); 274 validateManifests(inventory, "SHA-256", baseDir); 275 } 276 277 @Test 278 public void testPlainOcflObjectAlreadyExistsInOcfl() throws Exception { 279 final var pid = "example:1"; 280 final var ocflRepo = new OcflRepositoryBuilder() 281 .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig()) 282 .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build()) 283 .workDir(tmpDir) 284 .build(); 285 ocflRepo.updateObject(ObjectVersionId.head(pid), new VersionInfo(), updater -> { 286 updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1"); 287 }); 288 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 289 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 290 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 291 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 292 "--id-prefix", ""}; 293 final PicocliMigrator migrator = new PicocliMigrator(); 294 final CommandLine cmd = new CommandLine(migrator); 295 final int result = cmd.execute(args); 296 assertEquals(1, result); //should fail because object already exists 297 } 298 299 @Test 300 public void testFedoraOcflObjectAlreadyExistsInOcfl() throws Exception { 301 final var ocflObjectId = "info:fedora/example:1"; 302 final var ocflRepo = new OcflRepositoryBuilder() 303 .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig()) 304 .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build()) 305 .workDir(tmpDir) 306 .build(); 307 ocflRepo.updateObject(ObjectVersionId.head(ocflObjectId), new VersionInfo(), updater -> { 308 updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1"); 309 }); 310 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 311 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 312 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 313 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 314 final PicocliMigrator migrator = new PicocliMigrator(); 315 final CommandLine cmd = new CommandLine(migrator); 316 final int result = cmd.execute(args); 317 assertEquals(1, result); //should fail because object already exists 318 } 319 320 @Test 321 public void testInvalidChecksumErrorsPlain() throws Exception { 322 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 323 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 324 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 325 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"}; 326 final PicocliMigrator migrator = new PicocliMigrator(); 327 final CommandLine cmd = new CommandLine(migrator); 328 final int result = cmd.execute(args); 329 assertEquals(1, result); //should fail because of invalid checksum 330 } 331 332 @Test 333 public void testInvalidChecksumErrorsFedora() throws Exception { 334 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 335 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 336 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 337 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"}; 338 final PicocliMigrator migrator = new PicocliMigrator(); 339 final CommandLine cmd = new CommandLine(migrator); 340 final int result = cmd.execute(args); 341 assertEquals(1, result); //should fail because of invalid checksum 342 } 343 344 @Test 345 public void testInvalidChecksumCanBeAllowedPlain() throws Exception { 346 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 347 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 348 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 349 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01", 350 "--no-checksum-validation"}; 351 final PicocliMigrator migrator = new PicocliMigrator(); 352 final CommandLine cmd = new CommandLine(migrator); 353 final int result = cmd.execute(args); 354 assertEquals(0, result); //should succeed because checksum validation is disabled 355 } 356 357 @Test 358 public void testInvalidChecksumCanBeAllowedFedora() throws Exception { 359 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 360 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 361 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 362 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01", 363 "--no-checksum-validation"}; 364 final PicocliMigrator migrator = new PicocliMigrator(); 365 final CommandLine cmd = new CommandLine(migrator); 366 final int result = cmd.execute(args); 367 assertEquals(0, result); //should succeed because checksum validation is disabled 368 } 369 370 @Test 371 public void handleOutOfOrderDatastreamVersions() throws Exception { 372 final var ocflObjectId = "info:fedora/example:1"; 373 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 374 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 375 "--datastreams-dir","src/test/resources/legacyFS-out-of-order/datastreams/2015/0430/16/01", 376 "--objects-dir", "src/test/resources/legacyFS-out-of-order/objects/2015/0430/16/01"}; 377 final PicocliMigrator migrator = new PicocliMigrator(); 378 final CommandLine cmd = new CommandLine(migrator); 379 380 final int result = cmd.execute(args); 381 assertEquals(0, result); 382 383 final var ocflRepo = createOcflRepo(); 384 385 final var obj = ocflRepo.getObject(ObjectVersionId.head(ocflObjectId)); 386 try (final var stream = obj.getFile("DS1").getStream()) { 387 assertEquals("\n<test>\n This is a test that was edited.\n</test>\n", 388 IOUtils.toString(stream, StandardCharsets.UTF_8)); 389 } 390 } 391 392 /** 393 * Validate the manifest digests in an inventory file. 394 * @param inventory the inventory file 395 * @param digestAlgo the digest algorithm 396 * @param baseDir the path of the OCFL object 397 * @throws IOException issues opening the inventory file. 398 * @throws NoSuchAlgorithmException issues creating a MessageDigest. 399 */ 400 private void validateManifests(final File inventory, final String digestAlgo, final Path baseDir) 401 throws IOException, NoSuchAlgorithmException { 402 final var manifests = getManifests(inventory); 403 final MessageDigest md = MessageDigest.getInstance(digestAlgo); 404 for (final var entry : manifests.entrySet()) { 405 final File f = baseDir.resolve(entry.getKey()).toFile(); 406 assertTrue(f.exists()); 407 final String digest = new String(Hex.encodeHex(DigestUtils.digest(md, new FileInputStream(f)))); 408 assertEquals(entry.getValue(), digest); 409 } 410 } 411 412 /** 413 * Parse the manifest section out of an OCFL inventory file and return a map of filename -> hash 414 * @param inventory the OCFL inventory file 415 * @return map of file paths from the OCFL object root and their digests 416 * @throws IOException issues opening the inventory file. 417 */ 418 private Map<String, String> getManifests(final File inventory) throws IOException { 419 final ObjectMapper mapper = new ObjectMapper(); 420 final JsonNode rootNode = mapper.readTree(inventory); 421 final JsonNode manifestNode = rootNode.findValues("manifest").get(0); 422 final Map<String, String> fileManifest = new HashMap<>(); 423 final var fieldIter = manifestNode.fields(); 424 while (fieldIter.hasNext()) { 425 final var entry = fieldIter.next(); 426 final String hash = entry.getKey(); 427 if (entry.getValue().isArray()) { 428 // More than one file with the same hash 429 entry.getValue().spliterator().forEachRemaining(file -> fileManifest.put(file.asText(), hash)); 430 } else { 431 fileManifest.put(entry.getValue().asText(), hash); 432 } 433 } 434 return fileManifest; 435 } 436 437 private OcflRepository createOcflRepo() { 438 return new OcflRepositoryBuilder() 439 .defaultLayoutConfig(new HashedNTupleLayoutConfig()) 440 .storage(OcflStorageBuilder.builder().fileSystem(targetDir.resolve("data/ocfl-root")).build()) 441 .workDir(workingDir) 442 .build(); 443 } 444 445}