001package org.fcrepo.migration; 002 003import static org.junit.Assert.assertEquals; 004import static org.junit.Assert.assertTrue; 005 006import java.io.ByteArrayInputStream; 007import java.io.File; 008import java.io.FileInputStream; 009import java.io.IOException; 010import java.nio.charset.StandardCharsets; 011import java.nio.file.Files; 012import java.nio.file.Path; 013import java.security.MessageDigest; 014import java.security.NoSuchAlgorithmException; 015import java.util.ArrayList; 016import java.util.HashMap; 017import java.util.Map; 018 019import edu.wisc.library.ocfl.api.OcflRepository; 020import edu.wisc.library.ocfl.api.model.FileDetails; 021import edu.wisc.library.ocfl.api.model.ObjectVersionId; 022import edu.wisc.library.ocfl.api.model.VersionDetails; 023import edu.wisc.library.ocfl.api.model.VersionInfo; 024import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig; 025import edu.wisc.library.ocfl.core.storage.OcflStorageBuilder; 026import org.apache.commons.codec.binary.Hex; 027import org.apache.commons.codec.digest.DigestUtils; 028import org.apache.commons.io.FileUtils; 029import org.apache.commons.io.IOUtils; 030import org.junit.After; 031import org.junit.Before; 032import org.junit.Test; 033 034import com.fasterxml.jackson.databind.JsonNode; 035import com.fasterxml.jackson.databind.ObjectMapper; 036 037import edu.wisc.library.ocfl.core.OcflRepositoryBuilder; 038import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleIdEncapsulationLayoutConfig; 039import picocli.CommandLine; 040 041/** 042 * @author bcail 043 */ 044public class PicocliIT { 045 046 private Path tmpDir; 047 private Path targetDir; 048 private Path workingDir; 049 050 @Before 051 public void setup() throws IOException { 052 tmpDir = Files.createTempDirectory("migration-utils"); 053 targetDir = tmpDir.resolve("target"); 054 workingDir = tmpDir.resolve("working"); 055 } 056 057 @After 058 public void tearDown() throws IOException { 059 try { 060 FileUtils.forceDelete(tmpDir.toFile()); 061 } catch (final IOException io) { 062 System.err.println("Error cleaning up " + tmpDir.toString()); 063 io.printStackTrace(); 064 } 065 } 066 067 private boolean checkDirForNamaste(final Path targetDir) throws IOException { 068 return Files.list(targetDir).map(Path::getFileName).map(Path::toString) 069 .anyMatch(e -> e.startsWith("0=ocfl_1.")); 070 } 071 072 @Test 073 public void testPlainOcfl() throws Exception { 074 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 075 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 076 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 077 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 078 final PicocliMigrator migrator = new PicocliMigrator(); 079 final CommandLine cmd = new CommandLine(migrator); 080 081 cmd.execute(args); 082 assertTrue(checkDirForNamaste(targetDir)); 083 final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69") 084 .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6"); 085 final File inventory = baseDir.resolve("inventory.json").toFile(); 086 assertTrue(inventory.exists()); 087 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 088 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 089 } 090 091 @Test 092 public void testPlainOcflEmptyIdPrefix() throws Exception { 093 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 094 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 095 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 096 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 097 "--id-prefix", ""}; 098 final PicocliMigrator migrator = new PicocliMigrator(); 099 final CommandLine cmd = new CommandLine(migrator); 100 101 final int result = cmd.execute(args); 102 assertEquals(0, result); 103 assertTrue(checkDirForNamaste(targetDir)); 104 final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b") 105 .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8"); 106 final File inventory = baseDir.resolve("inventory.json").toFile(); 107 assertTrue(inventory.exists()); 108 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 109 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 110 } 111 112 @Test 113 public void testFedoraOcflCantChangeIdPrefix() throws Exception { 114 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 115 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 116 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 117 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 118 "--id-prefix", ""}; 119 final PicocliMigrator migrator = new PicocliMigrator(); 120 final CommandLine cmd = new CommandLine(migrator); 121 122 final int result = cmd.execute(args); 123 assertEquals(1, result); 124 } 125 126 @Test 127 public void testPlainOcflNoWorkingDirOption() throws Exception { 128 final String[] args = {"--target-dir", targetDir.toString(), 129 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 130 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 131 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 132 final PicocliMigrator migrator = new PicocliMigrator(); 133 final CommandLine cmd = new CommandLine(migrator); 134 135 cmd.execute(args); 136 final Path workingDir = Path.of(System.getProperty("user.dir")); 137 assertTrue(checkDirForNamaste(targetDir)); 138 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 139 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 140 } 141 142 @Test 143 public void testFedoraOcfl() throws Exception { 144 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 145 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 146 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 147 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 148 final PicocliMigrator migrator = new PicocliMigrator(); 149 final CommandLine cmd = new CommandLine(migrator); 150 151 cmd.execute(args); 152 assertTrue(checkDirForNamaste(targetDir.resolve("data").resolve("ocfl-root"))); 153 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 154 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 155 } 156 157 @Test 158 public void testExistingRepoDifferentStorageLayout() throws Exception { 159 //create repo with different storage layout 160 final var ocflRepo = new OcflRepositoryBuilder() 161 .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig()) 162 .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build()) 163 .workDir(tmpDir) 164 .build(); 165 assertTrue(checkDirForNamaste(targetDir)); 166 167 //migrate object into it 168 final Path workingDir = tmpDir.resolve("working"); 169 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 170 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 171 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 172 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 173 "--id-prefix", ""}; 174 final PicocliMigrator migrator = new PicocliMigrator(); 175 final CommandLine cmd = new CommandLine(migrator); 176 cmd.execute(args); 177 178 //verify that the correct storage layout was used - encapsulation directory is the encoded object id 179 assertTrue(Files.list(targetDir.resolve("750").resolve("677").resolve("e9b")) 180 .anyMatch(f -> f.endsWith("example%3a1"))); 181 } 182 183 @Test 184 public void testMigrateFoxmlFileInsteadOfPropertyFiles() throws Exception { 185 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 186 "--source-type", "LEGACY", "--migration-type", "PLAIN_OCFL", 187 "--datastreams-dir", "src/test/resources/legacyFS/datastreams/2015/0430/16/01", 188 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 189 "--id-prefix", "", "--foxml-file"}; 190 final PicocliMigrator migrator = new PicocliMigrator(); 191 final CommandLine cmd = new CommandLine(migrator); 192 cmd.execute(args); 193 194 final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b") 195 .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8"); 196 final File inventory = baseDir.resolve("inventory.json").toFile(); 197 assertTrue(inventory.exists()); 198 final var ocflRepo = new OcflRepositoryBuilder() 199 .defaultLayoutConfig(new HashedNTupleLayoutConfig()) 200 .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build()) 201 .workDir(workingDir) 202 .build(); 203 final var object = ocflRepo.getObject(ObjectVersionId.head("example:1")); 204 final ArrayList<String> files = new ArrayList<String>(); 205 for (final var file: object.getFiles()) { 206 files.add(file.getPath()); 207 } 208 final var expectedFiles = new ArrayList<String>(); 209 expectedFiles.add("AUDIT"); 210 expectedFiles.add("DS2"); 211 expectedFiles.add("DS1"); 212 expectedFiles.add("DS4"); 213 expectedFiles.add("DS3"); 214 expectedFiles.add("DC"); 215 assertEquals(expectedFiles, files); 216 //now check for a FOXML, which should show up in a previous version 217 final var versions = ocflRepo.describeObject("example:1").getVersionMap().values(); 218 boolean foundFoxml = false; 219 for (final VersionDetails v : versions) { 220 for (final FileDetails f : v.getFiles()) { 221 if (f.getPath().equals("FOXML")) { 222 foundFoxml = true; 223 break; 224 } 225 } 226 } 227 assertTrue(foundFoxml); 228 } 229 230 @Test 231 public void testInvalidDigestAlgorithm() throws Exception { 232 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 233 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 234 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 235 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 236 "--algorithm", "sha384"}; 237 final PicocliMigrator migrator = new PicocliMigrator(); 238 final CommandLine cmd = new CommandLine(migrator); 239 240 final int result = cmd.execute(args); 241 assertEquals(1, result); 242 } 243 244 /** 245 * MD5 is a supported algorithm under an OCFL extension, but we don't support it. 246 */ 247 @Test 248 public void testInvalidForUsDigestAlgorithm() { 249 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 250 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 251 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 252 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 253 "--algorithm", "md5"}; 254 final PicocliMigrator migrator = new PicocliMigrator(); 255 final CommandLine cmd = new CommandLine(migrator); 256 257 final int result = cmd.execute(args); 258 assertEquals(1, result); 259 } 260 261 @Test 262 public void testSha256DigestAlgorithm() throws Exception { 263 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 264 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 265 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 266 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 267 "--algorithm", "sha256"}; 268 final PicocliMigrator migrator = new PicocliMigrator(); 269 final CommandLine cmd = new CommandLine(migrator); 270 271 final int result = cmd.execute(args); 272 assertEquals(0, result); 273 assertTrue(checkDirForNamaste(targetDir)); 274 final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69") 275 .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6"); 276 final File inventory = baseDir.resolve("inventory.json").toFile(); 277 assertTrue(inventory.exists()); 278 validateManifests(inventory, "SHA-256", baseDir); 279 } 280 281 @Test 282 public void testPlainOcflObjectAlreadyExistsInOcfl() throws Exception { 283 final var pid = "example:1"; 284 final var ocflRepo = new OcflRepositoryBuilder() 285 .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig()) 286 .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build()) 287 .workDir(tmpDir) 288 .build(); 289 ocflRepo.updateObject(ObjectVersionId.head(pid), new VersionInfo(), updater -> { 290 updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1"); 291 }); 292 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 293 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 294 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 295 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 296 "--id-prefix", ""}; 297 final PicocliMigrator migrator = new PicocliMigrator(); 298 final CommandLine cmd = new CommandLine(migrator); 299 final int result = cmd.execute(args); 300 assertEquals(1, result); //should fail because object already exists 301 } 302 303 @Test 304 public void testFedoraOcflObjectAlreadyExistsInOcfl() throws Exception { 305 final var ocflObjectId = "info:fedora/example:1"; 306 final var ocflRepo = new OcflRepositoryBuilder() 307 .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig()) 308 .storage(OcflStorageBuilder.builder().fileSystem(targetDir).build()) 309 .workDir(tmpDir) 310 .build(); 311 ocflRepo.updateObject(ObjectVersionId.head(ocflObjectId), new VersionInfo(), updater -> { 312 updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1"); 313 }); 314 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 315 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 316 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 317 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 318 final PicocliMigrator migrator = new PicocliMigrator(); 319 final CommandLine cmd = new CommandLine(migrator); 320 final int result = cmd.execute(args); 321 assertEquals(1, result); //should fail because object already exists 322 } 323 324 @Test 325 public void testInvalidChecksumErrorsPlain() throws Exception { 326 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 327 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 328 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 329 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"}; 330 final PicocliMigrator migrator = new PicocliMigrator(); 331 final CommandLine cmd = new CommandLine(migrator); 332 final int result = cmd.execute(args); 333 assertEquals(1, result); //should fail because of invalid checksum 334 } 335 336 @Test 337 public void testInvalidChecksumErrorsFedora() throws Exception { 338 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 339 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 340 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 341 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"}; 342 final PicocliMigrator migrator = new PicocliMigrator(); 343 final CommandLine cmd = new CommandLine(migrator); 344 final int result = cmd.execute(args); 345 assertEquals(1, result); //should fail because of invalid checksum 346 } 347 348 @Test 349 public void testInvalidChecksumCanBeAllowedPlain() throws Exception { 350 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 351 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 352 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 353 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01", 354 "--no-checksum-validation"}; 355 final PicocliMigrator migrator = new PicocliMigrator(); 356 final CommandLine cmd = new CommandLine(migrator); 357 final int result = cmd.execute(args); 358 assertEquals(0, result); //should succeed because checksum validation is disabled 359 } 360 361 @Test 362 public void testInvalidChecksumCanBeAllowedFedora() throws Exception { 363 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 364 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 365 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 366 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01", 367 "--no-checksum-validation"}; 368 final PicocliMigrator migrator = new PicocliMigrator(); 369 final CommandLine cmd = new CommandLine(migrator); 370 final int result = cmd.execute(args); 371 assertEquals(0, result); //should succeed because checksum validation is disabled 372 } 373 374 @Test 375 public void handleOutOfOrderDatastreamVersions() throws Exception { 376 final var ocflObjectId = "info:fedora/example:1"; 377 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 378 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 379 "--datastreams-dir","src/test/resources/legacyFS-out-of-order/datastreams/2015/0430/16/01", 380 "--objects-dir", "src/test/resources/legacyFS-out-of-order/objects/2015/0430/16/01"}; 381 final PicocliMigrator migrator = new PicocliMigrator(); 382 final CommandLine cmd = new CommandLine(migrator); 383 384 final int result = cmd.execute(args); 385 assertEquals(0, result); 386 387 final var ocflRepo = createOcflRepo(); 388 389 final var obj = ocflRepo.getObject(ObjectVersionId.head(ocflObjectId)); 390 try (final var stream = obj.getFile("DS1").getStream()) { 391 assertEquals("\n<test>\n This is a test that was edited.\n</test>\n", 392 IOUtils.toString(stream, StandardCharsets.UTF_8)); 393 } 394 } 395 396 /** 397 * Validate the manifest digests in an inventory file. 398 * @param inventory the inventory file 399 * @param digestAlgo the digest algorithm 400 * @param baseDir the path of the OCFL object 401 * @throws IOException issues opening the inventory file. 402 * @throws NoSuchAlgorithmException issues creating a MessageDigest. 403 */ 404 private void validateManifests(final File inventory, final String digestAlgo, final Path baseDir) 405 throws IOException, NoSuchAlgorithmException { 406 final var manifests = getManifests(inventory); 407 final MessageDigest md = MessageDigest.getInstance(digestAlgo); 408 for (final var entry : manifests.entrySet()) { 409 final File f = baseDir.resolve(entry.getKey()).toFile(); 410 assertTrue(f.exists()); 411 final String digest = new String(Hex.encodeHex(DigestUtils.digest(md, new FileInputStream(f)))); 412 assertEquals(entry.getValue(), digest); 413 } 414 } 415 416 /** 417 * Parse the manifest section out of an OCFL inventory file and return a map of filename -> hash 418 * @param inventory the OCFL inventory file 419 * @return map of file paths from the OCFL object root and their digests 420 * @throws IOException issues opening the inventory file. 421 */ 422 private Map<String, String> getManifests(final File inventory) throws IOException { 423 final ObjectMapper mapper = new ObjectMapper(); 424 final JsonNode rootNode = mapper.readTree(inventory); 425 final JsonNode manifestNode = rootNode.findValues("manifest").get(0); 426 final Map<String, String> fileManifest = new HashMap<>(); 427 final var fieldIter = manifestNode.fields(); 428 while (fieldIter.hasNext()) { 429 final var entry = fieldIter.next(); 430 final String hash = entry.getKey(); 431 if (entry.getValue().isArray()) { 432 // More than one file with the same hash 433 entry.getValue().spliterator().forEachRemaining(file -> fileManifest.put(file.asText(), hash)); 434 } else { 435 fileManifest.put(entry.getValue().asText(), hash); 436 } 437 } 438 return fileManifest; 439 } 440 441 private OcflRepository createOcflRepo() { 442 return new OcflRepositoryBuilder() 443 .defaultLayoutConfig(new HashedNTupleLayoutConfig()) 444 .storage(OcflStorageBuilder.builder().fileSystem(targetDir.resolve("data/ocfl-root")).build()) 445 .workDir(workingDir) 446 .build(); 447 } 448 449}