001/* 002 * The contents of this file are subject to the license and copyright detailed 003 * in the LICENSE and NOTICE files at the root of the source tree. 004 */ 005package org.duraspace.bagit; 006 007import static org.duraspace.bagit.BagProfileConstants.ACCEPT_BAGIT_VERSION; 008import static org.duraspace.bagit.BagProfileConstants.ACCEPT_SERIALIZATION; 009import static org.duraspace.bagit.BagProfileConstants.ALLOW_FETCH_TXT; 010import static org.duraspace.bagit.BagProfileConstants.BAGIT_PROFILE_INFO; 011import static org.duraspace.bagit.BagProfileConstants.BAGIT_TAG_SUFFIX; 012import static org.duraspace.bagit.BagProfileConstants.BAG_INFO; 013import static org.duraspace.bagit.BagProfileConstants.MANIFESTS_ALLOWED; 014import static org.duraspace.bagit.BagProfileConstants.MANIFESTS_REQUIRED; 015import static org.duraspace.bagit.BagProfileConstants.OTHER_INFO; 016import static org.duraspace.bagit.BagProfileConstants.SERIALIZATION; 017import static org.duraspace.bagit.BagProfileConstants.TAG_FILES_ALLOWED; 018import static org.duraspace.bagit.BagProfileConstants.TAG_FILES_REQUIRED; 019import static org.duraspace.bagit.BagProfileConstants.TAG_MANIFESTS_ALLOWED; 020import static org.duraspace.bagit.BagProfileConstants.TAG_MANIFESTS_REQUIRED; 021import static org.slf4j.LoggerFactory.getLogger; 022 023import java.io.IOException; 024import java.io.InputStream; 025import java.net.URL; 026import java.nio.file.Files; 027import java.nio.file.Path; 028import java.nio.file.Paths; 029import java.util.Arrays; 030import java.util.Collections; 031import java.util.HashMap; 032import java.util.HashSet; 033import java.util.Iterator; 034import java.util.Map; 035import java.util.Objects; 036import java.util.Set; 037import java.util.regex.Matcher; 038import java.util.regex.Pattern; 039import java.util.stream.Collectors; 040 041import com.fasterxml.jackson.databind.JsonNode; 042import com.fasterxml.jackson.databind.ObjectMapper; 043import gov.loc.repository.bagit.domain.Bag; 044import gov.loc.repository.bagit.domain.Manifest; 045import org.slf4j.Logger; 046 047/** 048 * A BagProfile contains the entire contents of a BagIt profile specified through the profile's json. 049 * 050 * @author mikejritter 051 * @author escowles 052 * @since 2016-12-12 053 */ 054public class BagProfile { 055 056 public enum Serialization { 057 FORBIDDEN, REQUIRED, OPTIONAL, UNKNOWN; 058 059 /** 060 * Retrieve the {@link Serialization} from a string representation 061 * 062 * @param value the String value to use 063 * @return the {@link Serialization} the {@code value} is equal to 064 */ 065 public static Serialization of(final String value) { 066 switch (value.toLowerCase()) { 067 case "forbidden": return FORBIDDEN; 068 case "required": return REQUIRED; 069 case "optional": return OPTIONAL; 070 default: return UNKNOWN; 071 } 072 } 073 } 074 075 /** 076 * Enum of the built in profiles which are provided with bagit-support 077 */ 078 public enum BuiltIn { 079 APTRUST("aptrust"), 080 BEYOND_THE_REPOSITORY("beyondtherepository"), 081 DEFAULT("default"), 082 METAARCHIVE("metaarchive"), 083 PERSEIDS("perseids"); 084 085 private final String identifier; 086 087 /** 088 * Default constructor 089 * 090 * @param identifier the identifier of the profile 091 */ 092 BuiltIn(final String identifier) { 093 this.identifier = identifier; 094 } 095 096 /** 097 * Retrieve a built in profile from an identifier 098 * 099 * @param identifier the identifier to retrieve a profile for 100 * @return the {@link BuiltIn} profile 101 * @throws IllegalArgumentException if the {@code identifier} is not supported 102 */ 103 public static BuiltIn from(final String identifier) { 104 switch (identifier.toLowerCase()) { 105 case "aptrust": return APTRUST; 106 case "beyondtherepository": return BEYOND_THE_REPOSITORY; 107 case "default": return DEFAULT; 108 case "metaarchive": return METAARCHIVE; 109 case "perseids": return PERSEIDS; 110 default: throw new IllegalArgumentException("Unsupported profile identifier. Accepted values are: " + 111 Arrays.stream(BuiltIn.values()) 112 .map(BuiltIn::getIdentifier) 113 .collect(Collectors.joining(", "))); 114 } 115 } 116 117 /** 118 * Get the identifier associated with the profile 119 * 120 * @return the identifier 121 */ 122 public String getIdentifier() { 123 return identifier; 124 } 125 } 126 127 private static final Logger logger = getLogger(BagProfile.class); 128 129 private boolean allowFetch; 130 private Serialization serialization; 131 132 private Set<String> acceptedBagItVersions; 133 private Set<String> acceptedSerializations; 134 135 private Set<String> tagFilesAllowed; 136 private Set<String> tagFilesRequired; 137 138 private Set<String> allowedPayloadAlgorithms; 139 private Set<String> allowedTagAlgorithms; 140 141 private Set<String> payloadDigestAlgorithms; 142 private Set<String> tagDigestAlgorithms; 143 144 private Map<String, Map<String, ProfileFieldRule>> metadataFields = new HashMap<>(); 145 private Map<String, String> profileMetadata = new HashMap<>(); 146 147 /** 148 * Load a BagProfile from a {@link BuiltIn} profile type 149 * 150 * @param builtInProfile the supported profile to load 151 * @throws IOException if there is an error reading the json 152 */ 153 public BagProfile(final BuiltIn builtInProfile) throws IOException { 154 final String resource = "profiles/" + builtInProfile.identifier + ".json"; 155 final URL resourceURL = this.getClass().getClassLoader().getResource(resource); 156 try (InputStream in = Objects.requireNonNull(resourceURL).openStream()) { 157 load(in); 158 } 159 } 160 161 /** 162 * Create a BagProfile from a given InputStream 163 * 164 * @param in InputStream containing the Bag profile JSON document 165 * @throws IOException when there is an I/O error reading JSON 166 */ 167 public BagProfile(final InputStream in) throws IOException { 168 load(in); 169 } 170 171 private void load(final InputStream in) throws IOException { 172 final ObjectMapper mapper = new ObjectMapper(); 173 final JsonNode json = mapper.readTree(in); 174 175 loadProfileInfo(json); 176 177 allowFetch = json.has(ALLOW_FETCH_TXT) ? json.get(ALLOW_FETCH_TXT).asBoolean() : true; 178 serialization = json.has(SERIALIZATION) ? Serialization.of(json.get(SERIALIZATION).asText()) 179 : Serialization.OPTIONAL; 180 181 acceptedBagItVersions = arrayValues(json, ACCEPT_BAGIT_VERSION); 182 acceptedSerializations = arrayValues(json, ACCEPT_SERIALIZATION); 183 184 tagFilesAllowed = arrayValues(json, TAG_FILES_ALLOWED); 185 tagFilesRequired = arrayValues(json, TAG_FILES_REQUIRED); 186 187 allowedPayloadAlgorithms = arrayValues(json, MANIFESTS_ALLOWED); 188 allowedTagAlgorithms = arrayValues(json, TAG_MANIFESTS_ALLOWED); 189 190 payloadDigestAlgorithms = arrayValues(json, MANIFESTS_REQUIRED); 191 tagDigestAlgorithms = arrayValues(json, TAG_MANIFESTS_REQUIRED); 192 193 metadataFields.put(BAG_INFO.toLowerCase(), metadataFields(json.get(BAG_INFO))); 194 195 if (json.get(OTHER_INFO) != null) { 196 loadOtherTags(json); 197 } 198 } 199 200 private void loadProfileInfo(final JsonNode json) { 201 final JsonNode tag = json.get(BAGIT_PROFILE_INFO); 202 if (tag != null) { 203 tag.fields().forEachRemaining(entry -> profileMetadata.put(entry.getKey(), entry.getValue().asText())); 204 } 205 } 206 207 private void loadOtherTags(final JsonNode json) { 208 final JsonNode arrayTags = json.get(OTHER_INFO); 209 if (arrayTags != null && arrayTags.isArray()) { 210 final Iterator<JsonNode> arrayEntries = arrayTags.elements(); 211 while (arrayEntries.hasNext()) { 212 final JsonNode entries = arrayEntries.next(); 213 final Iterator<Map.Entry<String, JsonNode>> fields = entries.fields(); 214 while (fields.hasNext()) { 215 final Map.Entry<String, JsonNode> entry = fields.next(); 216 final String tagName = entry.getKey().toLowerCase(); 217 metadataFields.put(tagName, metadataFields(entry.getValue())); 218 } 219 } 220 } 221 logger.debug("metadataFields is {}", metadataFields); 222 } 223 224 private static Set<String> arrayValues(final JsonNode json, final String key) { 225 final JsonNode values = json.get(key); 226 227 if (values == null) { 228 return Collections.emptySet(); 229 } 230 231 final Set<String> results = new HashSet<>(); 232 for (int i = 0; i < values.size(); i++) { 233 results.add(values.get(i).asText()); 234 } 235 return results; 236 } 237 238 /** 239 * Loads required tags and allowed values 240 * 241 * @param json json to parse 242 * @return map of tags => set of allowed values 243 */ 244 private static Map<String, ProfileFieldRule> metadataFields(final JsonNode json) { 245 if (json == null) { 246 return Collections.emptyMap(); 247 } 248 249 final Map<String, ProfileFieldRule> results = new HashMap<>(); 250 // why not use the entry to iterate? 251 for (final Iterator<String> it = json.fieldNames(); it.hasNext(); ) { 252 // fields to pass to the ProfileFieldRule constructor 253 boolean required = false; 254 boolean repeatable = true; 255 boolean recommended = false; 256 String description = "No description"; 257 258 final String name = it.next(); 259 final JsonNode field = json.get(name); 260 261 // read each of the fields for the ProfileFieldRule: 262 // required, repeated, recommended, description, and values 263 final JsonNode requiredNode = field.get("required"); 264 if (requiredNode != null && requiredNode.asBoolean()) { 265 required = requiredNode.asBoolean(); 266 } 267 268 final JsonNode repeatedNode = field.get("repeatable"); 269 if (repeatedNode != null) { 270 repeatable = repeatedNode.asBoolean(); 271 } 272 273 final JsonNode recommendedNode = field.get("recommended"); 274 if (recommendedNode != null && recommendedNode.asBoolean()) { 275 recommended = recommendedNode.asBoolean(); 276 } 277 278 final JsonNode descriptionNode = field.get("description"); 279 if (descriptionNode != null && !descriptionNode.asText().isEmpty()) { 280 description = descriptionNode.asText(); 281 } 282 283 final Set<String> values = arrayValues(field, "values"); 284 285 results.put(name, new ProfileFieldRule(required, repeatable, recommended, description, values)); 286 } 287 288 return results; 289 } 290 291 /** 292 * Retrieve the BagIt-Profile-Identifier for this profile 293 * 294 * @return the BagIt-Profile-Identifier, or an empty string if none is found 295 */ 296 public String getIdentifier() { 297 return profileMetadata.getOrDefault(BagProfileConstants.BAGIT_PROFILE_IDENTIFIER, ""); 298 } 299 300 /** 301 * Boolean flag allowing a fetch.txt file 302 * 303 * @return true if fetch.txt is allowed, false otherwise 304 */ 305 public boolean isAllowFetch() { 306 return allowFetch; 307 } 308 309 /** 310 * Get the support of serialization for a Bag. 311 * 312 * Allowed values are: forbidden, required, and optional 313 * 314 * @return String value of "forbidden", "required", or "optional" 315 */ 316 public Serialization getSerialization() { 317 return serialization; 318 } 319 320 /** 321 * Get the supported BagIt versions 322 * 323 * @return Set of BagIt version numbers 324 */ 325 public Set<String> getAcceptedBagItVersions() { 326 return acceptedBagItVersions; 327 } 328 329 /** 330 * Get the supported serialization formats 331 * 332 * If {@link BagProfile#getSerialization()} has a value of required or optional, at least one value is needed. 333 * If {@link BagProfile#getSerialization()} is forbidden, this has no meaning 334 * 335 * @return Set of serialization formats 336 */ 337 public Set<String> getAcceptedSerializations() { 338 return acceptedSerializations; 339 } 340 341 /** 342 * Get the names of allowed tag files; supports unix style globbing 343 * 344 * All the tag files listed in {@link BagProfile#getTagFilesRequired()} must be in included in this 345 * 346 * @return Set of allowed tag files 347 */ 348 public Set<String> getTagFilesAllowed() { 349 return tagFilesAllowed; 350 } 351 352 /** 353 * Get the tag files which are required to exist 354 * 355 * @return Set of tag filenames 356 */ 357 public Set<String> getTagFilesRequired() { 358 return tagFilesRequired; 359 } 360 361 /** 362 * Get the payload algorithms which are allowed 363 * 364 * When specified along with {@link BagProfile#getPayloadDigestAlgorithms()}, this must include at least all of the 365 * manifest types listed in {@link BagProfile#getPayloadDigestAlgorithms()}. 366 * 367 * @return Set of digest algorithm names 368 */ 369 public Set<String> getAllowedPayloadAlgorithms() { 370 return allowedPayloadAlgorithms; 371 } 372 373 /** 374 * Get the tag manifest algorithms which are allowed. 375 * 376 * When specified along with {@link BagProfile#getTagDigestAlgorithms()}, this must include at least all of the tag 377 * manifest types listed in {@link BagProfile#getTagDigestAlgorithms()}. 378 * 379 * @return Set of digest algorithm names 380 */ 381 public Set<String> getAllowedTagAlgorithms() { 382 return allowedTagAlgorithms; 383 } 384 385 /** 386 * Get the required digest algorithms for payload manifests. 387 * 388 * @return Set of digest algorithm names 389 */ 390 public Set<String> getPayloadDigestAlgorithms() { 391 return payloadDigestAlgorithms; 392 } 393 394 /** 395 * Get the required digest algorithms for tag manifests. 396 * 397 * @return Set of digest algorithm names 398 */ 399 public Set<String> getTagDigestAlgorithms() { 400 return tagDigestAlgorithms; 401 } 402 403 /** 404 * Get the required Bag-Info metadata fields. 405 * 406 * @return A map of field names to a ProfileFieldRule containing acceptance criteria 407 */ 408 public Map<String, ProfileFieldRule> getMetadataFields() { 409 return getMetadataFields(BAG_INFO); 410 } 411 412 /** 413 * Get the required tags for the extra tag file 414 * 415 * @param tagFile the tag file to get tags for 416 * @return map of tag = set of acceptable values, or null if tagFile doesn't exist 417 */ 418 public Map<String, ProfileFieldRule> getMetadataFields(final String tagFile) { 419 return metadataFields.get(tagFile.toLowerCase()); 420 } 421 422 /** 423 * Get all the section names in this profile, which can be used with getMetadataFields(). 424 * 425 * @return set of section names 426 */ 427 public Set<String> getSectionNames() { 428 return metadataFields.keySet(); 429 } 430 431 /** 432 * Get the BagIt-Profile-Info section describing the BagIt Profile 433 * 434 * @return map of fields names to text descriptions 435 */ 436 public Map<String, String> getProfileMetadata() { 437 return profileMetadata; 438 } 439 440 /** 441 * Validate a given BagConfig against the current profile 442 * 443 * @param config the BagConfig 444 */ 445 public void validateConfig(final BagConfig config) { 446 checkRequiredTagsExist(config.getTagFiles()); 447 for (final String section : config.getTagFiles()) { 448 validateTag(section, config.getFieldsForTagFile(section)); 449 } 450 } 451 452 /** 453 * Validate a configuration for tag files based on a mapping of BagIt tag filenames to key-value pairs. 454 * 455 * e.g. the filename "bag-info.txt" could contain the pairs "Source-Organization: DuraSpace" and 456 * "Organization-Address: The Cloud" 457 * 458 * @param config the Map containing the configuration of BagIt tag files 459 */ 460 public void validateTagFiles(final Map<String, Map<String, String>> config) { 461 checkRequiredTagsExist(config.keySet()); 462 config.forEach(this::validateTag); 463 } 464 465 /** 466 * Test that all required tag files exist 467 * 468 * @param tags the name of each tag file to check 469 */ 470 private void checkRequiredTagsExist(final Set<String> tags) { 471 for (String section : metadataFields.keySet()) { 472 final String expected = section + BAGIT_TAG_SUFFIX; 473 if (!tags.contains(expected)) { 474 throw new RuntimeException("Missing configuration for required tag file " + expected); 475 } 476 } 477 } 478 479 /** 480 * Validate a Mapping of key value pairs for a tag file 481 * 482 * @param filename the name of the tag file to validate 483 * @param fields A mapping of tag file names and their fields to validate 484 */ 485 private void validateTag(final String filename, final Map<String, String> fields) { 486 // strip the trailing file extension 487 final String section = getSection(filename); 488 logger.debug("Checking validation for {}", section); 489 if (metadataFields.containsKey(section)) { 490 try { 491 ProfileValidationUtil.validate(section, getMetadataFields(section), fields); 492 ProfileValidationUtil.validateTagIsAllowed(Paths.get(filename), tagFilesAllowed); 493 } catch (ProfileValidationException e) { 494 throw new RuntimeException(e.getMessage(), e); 495 } 496 } 497 } 498 499 /** 500 * Normalize a filename to be what we expect is held in the MetadataFields key set 501 * 502 * @param filename the filename to normalize 503 * @return the filename without a tag extension, so that it can be used with the metadataFields 504 */ 505 private String getSection(final String filename) { 506 // use two regexps 507 // the main pattern: two groups - a wildcard matcher for the filename and the tag suffix 508 // the replacement: just the first capture group 509 final String replacement = "$1"; 510 final Pattern tagEnding = Pattern.compile("(.*)(\\" + BAGIT_TAG_SUFFIX + ")"); 511 final Matcher matcher = tagEnding.matcher(filename.toLowerCase()); 512 return matcher.replaceAll(replacement); 513 } 514 515 /** 516 * Validate a given {@link Bag} against the current profile 517 * 518 * @param bag the Bag 519 */ 520 public void validateBag(final Bag bag) { 521 logger.info("Starting Bag to BagProfile conformance validator"); 522 523 final String tagIdentifier = "tag"; 524 final String fetchIdentifier = "fetch.txt"; 525 final String payloadIdentifier = "payload"; 526 final StringBuilder errors = new StringBuilder(); 527 528 final Path root = bag.getRootDir(); 529 final Set<Manifest> foundPayloadManifests = bag.getPayLoadManifests(); 530 final Set<Manifest> foundTagManifests = bag.getTagManifests(); 531 532 // check fetch rule 533 if (!allowFetch && (!bag.getItemsToFetch().isEmpty() || Files.exists(root.resolve(fetchIdentifier)))) { 534 errors.append("Profile does not allow a fetch.txt but fetch file found!\n"); 535 } 536 537 // check payload manifest algorithms 538 errors.append(ProfileValidationUtil.validateManifest(foundPayloadManifests, payloadDigestAlgorithms, 539 allowedPayloadAlgorithms, payloadIdentifier)); 540 541 // check tag manifest rules files allowed 542 // the reporting can be redundant if no tag manifests are found, so only check the allowed algorithms and 543 // tag files IF we have at least one tag manifest 544 if (foundTagManifests.isEmpty()) { 545 errors.append("No tag manifest found!\n"); 546 } else { 547 errors.append(ProfileValidationUtil.validateManifest(foundTagManifests, tagDigestAlgorithms, 548 allowedTagAlgorithms, tagIdentifier)); 549 550 // grab the first tag manifest and use that to check all registered tag files 551 final Manifest manifest = foundTagManifests.iterator().next(); 552 final Set<Path> existingTagFiles = manifest.getFileToChecksumMap().keySet(); 553 554 for (Path tag : existingTagFiles) { 555 final Path relativePath = tag.startsWith(root) ? root.relativize(tag) : tag; 556 try { 557 ProfileValidationUtil.validateTagIsAllowed(relativePath, tagFilesAllowed); 558 } catch (ProfileValidationException e) { 559 errors.append(e.getMessage()); 560 } 561 } 562 } 563 564 // check all required tag files exist 565 for (String tagName : tagFilesRequired) { 566 final Path requiredTag = root.resolve(tagName); 567 if (!requiredTag.toFile().exists()) { 568 errors.append("Required tag file \"").append(tagName).append("\" does not exist!\n"); 569 } 570 } 571 572 // check *-info required fields 573 for (String section : metadataFields.keySet()) { 574 final String tagFile = section.toLowerCase() + BAGIT_TAG_SUFFIX; 575 final Path resolved = root.resolve(tagFile); 576 try { 577 ProfileValidationUtil.validate(section, metadataFields.get(section), resolved); 578 } catch (IOException e) { 579 // error - could not read info 580 errors.append("Could not read info from \"").append(tagFile).append("\"!\n"); 581 } catch (ProfileValidationException e) { 582 errors.append(e.getMessage()); 583 } 584 } 585 586 // check allowed bagit versions 587 if (!acceptedBagItVersions.contains(bag.getVersion().toString())) { 588 errors.append("BagIt version incompatible; accepted versions are ") 589 .append(acceptedBagItVersions) 590 .append("\n"); 591 } 592 593 // serialization seems unnecessary as the import export tool does not support importing serialized bags 594 if (serialization == Serialization.REQUIRED) { 595 logger.warn("Bag Profile requires serialization, import will continue if the bag has been deserialized"); 596 } 597 598 // finally, if we have any errors throw an exception 599 if (errors.length() > 0) { 600 throw new RuntimeException("Bag profile validation failure: The following errors occurred: \n" + 601 errors.toString()); 602 } 603 } 604}