001package org.fcrepo.migration; 002 003import static org.junit.Assert.assertEquals; 004import static org.junit.Assert.assertTrue; 005 006import java.io.ByteArrayInputStream; 007import java.io.File; 008import java.io.FileInputStream; 009import java.io.IOException; 010import java.nio.charset.StandardCharsets; 011import java.nio.file.Files; 012import java.nio.file.Path; 013import java.security.MessageDigest; 014import java.security.NoSuchAlgorithmException; 015import java.util.ArrayList; 016import java.util.HashMap; 017import java.util.Map; 018 019import edu.wisc.library.ocfl.api.model.FileDetails; 020import edu.wisc.library.ocfl.api.model.ObjectVersionId; 021import edu.wisc.library.ocfl.api.model.VersionDetails; 022import edu.wisc.library.ocfl.api.model.VersionInfo; 023import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig; 024import org.apache.commons.codec.binary.Hex; 025import org.apache.commons.codec.digest.DigestUtils; 026import org.apache.commons.io.FileUtils; 027import org.junit.After; 028import org.junit.Before; 029import org.junit.Test; 030 031import com.fasterxml.jackson.databind.JsonNode; 032import com.fasterxml.jackson.databind.ObjectMapper; 033 034import edu.wisc.library.ocfl.core.OcflRepositoryBuilder; 035import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleIdEncapsulationLayoutConfig; 036import edu.wisc.library.ocfl.core.storage.filesystem.FileSystemOcflStorage; 037import picocli.CommandLine; 038 039/** 040 * @author bcail 041 */ 042public class PicocliIT { 043 044 private Path tmpDir; 045 046 @Before 047 public void setup() throws IOException { 048 tmpDir = Files.createTempDirectory("migration-utils"); 049 } 050 051 @After 052 public void tearDown() throws IOException { 053 try { 054 FileUtils.forceDelete(tmpDir.toFile()); 055 } catch (final IOException io) { 056 System.err.println("Error cleaning up " + tmpDir.toString()); 057 io.printStackTrace(); 058 } 059 } 060 061 @Test 062 public void testPlainOcfl() throws Exception { 063 final Path targetDir = tmpDir.resolve("target"); 064 final Path workingDir = tmpDir.resolve("working"); 065 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 066 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 067 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 068 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 069 final PicocliMigrator migrator = new PicocliMigrator(); 070 final CommandLine cmd = new CommandLine(migrator); 071 072 cmd.execute(args); 073 assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 074 final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69") 075 .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6"); 076 final File inventory = baseDir.resolve("inventory.json").toFile(); 077 assertTrue(inventory.exists()); 078 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 079 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 080 } 081 082 @Test 083 public void testPlainOcflEmptyIdPrefix() throws Exception { 084 final Path targetDir = tmpDir.resolve("target"); 085 final Path workingDir = tmpDir.resolve("working"); 086 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 087 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 088 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 089 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 090 "--id-prefix", ""}; 091 final PicocliMigrator migrator = new PicocliMigrator(); 092 final CommandLine cmd = new CommandLine(migrator); 093 094 final int result = cmd.execute(args); 095 assertEquals(0, result); 096 assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 097 final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b") 098 .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8"); 099 final File inventory = baseDir.resolve("inventory.json").toFile(); 100 assertTrue(inventory.exists()); 101 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 102 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 103 } 104 105 @Test 106 public void testFedoraOcflCantChangeIdPrefix() throws Exception { 107 final Path targetDir = tmpDir.resolve("target"); 108 final Path workingDir = tmpDir.resolve("working"); 109 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 110 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 111 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 112 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 113 "--id-prefix", ""}; 114 final PicocliMigrator migrator = new PicocliMigrator(); 115 final CommandLine cmd = new CommandLine(migrator); 116 117 final int result = cmd.execute(args); 118 assertEquals(1, result); 119 } 120 121 @Test 122 public void testPlainOcflNoWorkingDirOption() throws Exception { 123 final Path targetDir = tmpDir.resolve("target"); 124 final String[] args = {"--target-dir", targetDir.toString(), 125 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 126 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 127 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 128 final PicocliMigrator migrator = new PicocliMigrator(); 129 final CommandLine cmd = new CommandLine(migrator); 130 131 cmd.execute(args); 132 final Path workingDir = Path.of(System.getProperty("user.dir")); 133 assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 134 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 135 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 136 } 137 138 @Test 139 public void testFedoraOcfl() throws Exception { 140 final Path targetDir = tmpDir.resolve("target"); 141 final Path workingDir = tmpDir.resolve("working"); 142 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 143 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 144 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 145 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 146 final PicocliMigrator migrator = new PicocliMigrator(); 147 final CommandLine cmd = new CommandLine(migrator); 148 149 cmd.execute(args); 150 assertTrue(Files.list(targetDir.resolve("data").resolve("ocfl-root")) 151 .anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 152 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("index"))); 153 assertTrue(Files.list(workingDir).anyMatch(element -> element.endsWith("pid"))); 154 } 155 156 @Test 157 public void testExistingRepoDifferentStorageLayout() throws Exception { 158 //create repo with different storage layout 159 final Path targetDir = tmpDir.resolve("target"); 160 final var ocflRepo = new OcflRepositoryBuilder() 161 .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig()) 162 .storage(FileSystemOcflStorage.builder().repositoryRoot(targetDir).build()) 163 .workDir(tmpDir) 164 .build(); 165 assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 166 167 //migrate object into it 168 final Path workingDir = tmpDir.resolve("working"); 169 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 170 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 171 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 172 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 173 "--id-prefix", ""}; 174 final PicocliMigrator migrator = new PicocliMigrator(); 175 final CommandLine cmd = new CommandLine(migrator); 176 cmd.execute(args); 177 178 //verify that the correct storage layout was used - encapsulation directory is the encoded object id 179 assertTrue(Files.list(targetDir.resolve("750").resolve("677").resolve("e9b")) 180 .anyMatch(f -> f.endsWith("example%3a1"))); 181 } 182 183 @Test 184 public void testMigrateFoxmlFileInsteadOfPropertyFiles() throws Exception { 185 final Path targetDir = tmpDir.resolve("target"); 186 final Path workingDir = tmpDir.resolve("working"); 187 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 188 "--source-type", "LEGACY", "--migration-type", "PLAIN_OCFL", 189 "--datastreams-dir", "src/test/resources/legacyFS/datastreams/2015/0430/16/01", 190 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 191 "--id-prefix", "", "--foxml-file"}; 192 final PicocliMigrator migrator = new PicocliMigrator(); 193 final CommandLine cmd = new CommandLine(migrator); 194 cmd.execute(args); 195 196 final Path baseDir = targetDir.resolve("750").resolve("677").resolve("e9b") 197 .resolve("750677e9b953845ba5069d27a3775fbced186987fd0f4a8c968ac457a7d415a8"); 198 final File inventory = baseDir.resolve("inventory.json").toFile(); 199 assertTrue(inventory.exists()); 200 final var ocflRepo = new OcflRepositoryBuilder() 201 .defaultLayoutConfig(new HashedNTupleLayoutConfig()) 202 .storage(FileSystemOcflStorage.builder().repositoryRoot(targetDir).build()) 203 .workDir(workingDir) 204 .build(); 205 final var object = ocflRepo.getObject(ObjectVersionId.head("example:1")); 206 final ArrayList<String> files = new ArrayList<String>(); 207 for (final var file: object.getFiles()) { 208 files.add(file.getPath()); 209 } 210 final var expectedFiles = new ArrayList<String>(); 211 expectedFiles.add("AUDIT"); 212 expectedFiles.add("DS2"); 213 expectedFiles.add("DS1"); 214 expectedFiles.add("DS4"); 215 expectedFiles.add("DS3"); 216 expectedFiles.add("DC"); 217 assertEquals(expectedFiles, files); 218 //now check for a FOXML, which should show up in a previous version 219 final var versions = ocflRepo.describeObject("example:1").getVersionMap().values(); 220 boolean foundFoxml = false; 221 for (VersionDetails v : versions) { 222 for (FileDetails f : v.getFiles()) { 223 if (f.getPath().equals("FOXML")) { 224 foundFoxml = true; 225 break; 226 } 227 } 228 } 229 assertTrue(foundFoxml); 230 } 231 232 @Test 233 public void testInvalidDigestAlgorithm() throws Exception { 234 final Path targetDir = tmpDir.resolve("target"); 235 final Path workingDir = tmpDir.resolve("working"); 236 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 237 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 238 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 239 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 240 "--algorithm", "sha384"}; 241 final PicocliMigrator migrator = new PicocliMigrator(); 242 final CommandLine cmd = new CommandLine(migrator); 243 244 final int result = cmd.execute(args); 245 assertEquals(1, result); 246 } 247 248 /** 249 * MD5 is a supported algorithm under an OCFL extension, but we don't support it. 250 */ 251 @Test 252 public void testInvalidForUsDigestAlgorithm() { 253 final Path targetDir = tmpDir.resolve("target"); 254 final Path workingDir = tmpDir.resolve("working"); 255 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 256 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 257 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 258 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 259 "--algorithm", "md5"}; 260 final PicocliMigrator migrator = new PicocliMigrator(); 261 final CommandLine cmd = new CommandLine(migrator); 262 263 final int result = cmd.execute(args); 264 assertEquals(1, result); 265 } 266 267 @Test 268 public void testSha256DigestAlgorithm() throws Exception { 269 final Path targetDir = tmpDir.resolve("target"); 270 final Path workingDir = tmpDir.resolve("working"); 271 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 272 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 273 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 274 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 275 "--algorithm", "sha256"}; 276 final PicocliMigrator migrator = new PicocliMigrator(); 277 final CommandLine cmd = new CommandLine(migrator); 278 279 final int result = cmd.execute(args); 280 assertEquals(0, result); 281 assertTrue(Files.list(targetDir).anyMatch(element -> element.endsWith("0=ocfl_1.0"))); 282 final Path baseDir = targetDir.resolve("5b5").resolve("62d").resolve("d69") 283 .resolve("5b562dd698f17e3198e007e6f77f9e48f20a556c6bae84e6fc8d98544831daa6"); 284 final File inventory = baseDir.resolve("inventory.json").toFile(); 285 assertTrue(inventory.exists()); 286 validateManifests(inventory, "SHA-256", baseDir); 287 } 288 289 @Test 290 public void testPlainOcflObjectAlreadyExistsInOcfl() throws Exception { 291 final Path targetDir = tmpDir.resolve("target"); 292 final Path workingDir = tmpDir.resolve("working"); 293 final var pid = "example:1"; 294 final var ocflRepo = new OcflRepositoryBuilder() 295 .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig()) 296 .storage(FileSystemOcflStorage.builder().repositoryRoot(targetDir).build()) 297 .workDir(tmpDir) 298 .build(); 299 ocflRepo.updateObject(ObjectVersionId.head(pid), new VersionInfo(), updater -> { 300 updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1"); 301 }); 302 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 303 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 304 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 305 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01", 306 "--id-prefix", ""}; 307 final PicocliMigrator migrator = new PicocliMigrator(); 308 final CommandLine cmd = new CommandLine(migrator); 309 final int result = cmd.execute(args); 310 assertEquals(1, result); //should fail because object already exists 311 } 312 313 @Test 314 public void testFedoraOcflObjectAlreadyExistsInOcfl() throws Exception { 315 final Path targetDir = tmpDir.resolve("target"); 316 final Path workingDir = tmpDir.resolve("working"); 317 final var ocflObjectId = "info:fedora/example:1"; 318 final var ocflRepo = new OcflRepositoryBuilder() 319 .defaultLayoutConfig(new HashedNTupleIdEncapsulationLayoutConfig()) 320 .storage(FileSystemOcflStorage.builder().repositoryRoot(targetDir).build()) 321 .workDir(tmpDir) 322 .build(); 323 ocflRepo.updateObject(ObjectVersionId.head(ocflObjectId), new VersionInfo(), updater -> { 324 updater.writeFile(new ByteArrayInputStream("data".getBytes(StandardCharsets.UTF_8)),"file1"); 325 }); 326 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 327 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 328 "--datastreams-dir","src/test/resources/legacyFS/datastreams/2015/0430/16/01", 329 "--objects-dir", "src/test/resources/legacyFS/objects/2015/0430/16/01"}; 330 final PicocliMigrator migrator = new PicocliMigrator(); 331 final CommandLine cmd = new CommandLine(migrator); 332 final int result = cmd.execute(args); 333 assertEquals(1, result); //should fail because object already exists 334 } 335 336 @Test 337 public void testInvalidChecksumErrorsPlain() throws Exception { 338 final Path targetDir = tmpDir.resolve("target"); 339 final Path workingDir = tmpDir.resolve("working"); 340 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 341 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 342 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 343 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"}; 344 final PicocliMigrator migrator = new PicocliMigrator(); 345 final CommandLine cmd = new CommandLine(migrator); 346 final int result = cmd.execute(args); 347 assertEquals(1, result); //should fail because of invalid checksum 348 } 349 350 @Test 351 public void testInvalidChecksumErrorsFedora() throws Exception { 352 final Path targetDir = tmpDir.resolve("target"); 353 final Path workingDir = tmpDir.resolve("working"); 354 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 355 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 356 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 357 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01"}; 358 final PicocliMigrator migrator = new PicocliMigrator(); 359 final CommandLine cmd = new CommandLine(migrator); 360 final int result = cmd.execute(args); 361 assertEquals(1, result); //should fail because of invalid checksum 362 } 363 364 @Test 365 public void testInvalidChecksumCanBeAllowedPlain() throws Exception { 366 final Path targetDir = tmpDir.resolve("target"); 367 final Path workingDir = tmpDir.resolve("working"); 368 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 369 "--source-type", "LEGACY","--migration-type", "PLAIN_OCFL", 370 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 371 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01", 372 "--no-checksum-validation"}; 373 final PicocliMigrator migrator = new PicocliMigrator(); 374 final CommandLine cmd = new CommandLine(migrator); 375 final int result = cmd.execute(args); 376 assertEquals(0, result); //should succeed because checksum validation is disabled 377 } 378 379 @Test 380 public void testInvalidChecksumCanBeAllowedFedora() throws Exception { 381 final Path targetDir = tmpDir.resolve("target"); 382 final Path workingDir = tmpDir.resolve("working"); 383 final String[] args = {"--target-dir", targetDir.toString(), "--working-dir", workingDir.toString(), 384 "--source-type", "LEGACY","--migration-type", "FEDORA_OCFL", 385 "--datastreams-dir","src/test/resources/legacyFS-invalid-checksum/datastreams/2015/0430/16/01", 386 "--objects-dir", "src/test/resources/legacyFS-invalid-checksum/objects/2015/0430/16/01", 387 "--no-checksum-validation"}; 388 final PicocliMigrator migrator = new PicocliMigrator(); 389 final CommandLine cmd = new CommandLine(migrator); 390 final int result = cmd.execute(args); 391 assertEquals(0, result); //should succeed because checksum validation is disabled 392 } 393 394 /** 395 * Validate the manifest digests in an inventory file. 396 * @param inventory the inventory file 397 * @param digestAlgo the digest algorithm 398 * @param baseDir the path of the OCFL object 399 * @throws IOException issues opening the inventory file. 400 * @throws NoSuchAlgorithmException issues creating a MessageDigest. 401 */ 402 private void validateManifests(final File inventory, final String digestAlgo, final Path baseDir) 403 throws IOException, NoSuchAlgorithmException { 404 final var manifests = getManifests(inventory); 405 final MessageDigest md = MessageDigest.getInstance(digestAlgo); 406 for (final var entry : manifests.entrySet()) { 407 final File f = baseDir.resolve(entry.getKey()).toFile(); 408 assertTrue(f.exists()); 409 final String digest = new String(Hex.encodeHex(DigestUtils.digest(md, new FileInputStream(f)))); 410 assertEquals(entry.getValue(), digest); 411 } 412 } 413 414 /** 415 * Parse the manifest section out of an OCFL inventory file and return a map of filename -> hash 416 * @param inventory the OCFL inventory file 417 * @return map of file paths from the OCFL object root and their digests 418 * @throws IOException issues opening the inventory file. 419 */ 420 private Map<String, String> getManifests(final File inventory) throws IOException { 421 final ObjectMapper mapper = new ObjectMapper(); 422 final JsonNode rootNode = mapper.readTree(inventory); 423 final JsonNode manifestNode = rootNode.findValues("manifest").get(0); 424 final Map<String, String> fileManifest = new HashMap<>(); 425 final var fieldIter = manifestNode.fields(); 426 while (fieldIter.hasNext()) { 427 final var entry = fieldIter.next(); 428 final String hash = entry.getKey(); 429 if (entry.getValue().isArray()) { 430 // More than one file with the same hash 431 entry.getValue().spliterator().forEachRemaining(file -> fileManifest.put(file.asText(), hash)); 432 } else { 433 fileManifest.put(entry.getValue().asText(), hash); 434 } 435 } 436 return fileManifest; 437 } 438}