001/* 002 * The contents of this file are subject to the license and copyright detailed 003 * in the LICENSE and NOTICE files at the root of the source tree. 004 */ 005package org.duraspace.bagit; 006 007import static org.duraspace.bagit.BagProfileConstants.ACCEPT_BAGIT_VERSION; 008import static org.duraspace.bagit.BagProfileConstants.ACCEPT_SERIALIZATION; 009import static org.duraspace.bagit.BagProfileConstants.ALLOW_FETCH_TXT; 010import static org.duraspace.bagit.BagProfileConstants.BAGIT_PROFILE_INFO; 011import static org.duraspace.bagit.BagProfileConstants.BAG_INFO; 012import static org.duraspace.bagit.BagProfileConstants.MANIFESTS_ALLOWED; 013import static org.duraspace.bagit.BagProfileConstants.MANIFESTS_REQUIRED; 014import static org.duraspace.bagit.BagProfileConstants.OTHER_INFO; 015import static org.duraspace.bagit.BagProfileConstants.SERIALIZATION; 016import static org.duraspace.bagit.BagProfileConstants.TAG_FILES_ALLOWED; 017import static org.duraspace.bagit.BagProfileConstants.TAG_FILES_REQUIRED; 018import static org.duraspace.bagit.BagProfileConstants.TAG_MANIFESTS_ALLOWED; 019import static org.duraspace.bagit.BagProfileConstants.TAG_MANIFESTS_REQUIRED; 020import static org.slf4j.LoggerFactory.getLogger; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.net.URL; 025import java.nio.file.Files; 026import java.nio.file.Path; 027import java.nio.file.Paths; 028import java.util.Arrays; 029import java.util.Collections; 030import java.util.HashMap; 031import java.util.HashSet; 032import java.util.Iterator; 033import java.util.Map; 034import java.util.Objects; 035import java.util.Set; 036import java.util.stream.Collectors; 037 038import org.slf4j.Logger; 039 040import com.fasterxml.jackson.databind.JsonNode; 041import com.fasterxml.jackson.databind.ObjectMapper; 042import gov.loc.repository.bagit.domain.Bag; 043import gov.loc.repository.bagit.domain.Manifest; 044 045/** 046 * A BagProfile contains the entire contents of a BagIt profile specified through the profile's json. 047 * 048 * @author mikejritter 049 * @author escowles 050 * @since 2016-12-12 051 */ 052public class BagProfile { 053 054 public enum Serialization { 055 FORBIDDEN, REQUIRED, OPTIONAL, UNKNOWN; 056 057 /** 058 * Retrieve the {@link Serialization} from a string representation 059 * 060 * @param value the String value to use 061 * @return the {@link Serialization} the {@code value} is equal to 062 */ 063 public static Serialization of(final String value) { 064 switch (value.toLowerCase()) { 065 case "forbidden": return FORBIDDEN; 066 case "required": return REQUIRED; 067 case "optional": return OPTIONAL; 068 default: return UNKNOWN; 069 } 070 } 071 } 072 073 /** 074 * Enum of the built in profiles which are provided with bagit-support 075 */ 076 public enum BuiltIn { 077 APTRUST("aptrust"), 078 BEYOND_THE_REPOSITORY("beyondtherepository"), 079 DEFAULT("default"), 080 METAARCHIVE("metaarchive"), 081 PERSEIDS("perseids"); 082 083 private final String identifier; 084 085 /** 086 * Default constructor 087 * 088 * @param identifier the identifier of the profile 089 */ 090 BuiltIn(final String identifier) { 091 this.identifier = identifier; 092 } 093 094 /** 095 * Retrieve a built in profile from an identifier 096 * 097 * @param identifier the identifier to retrieve a profile for 098 * @return the {@link BuiltIn} profile 099 * @throws IllegalArgumentException if the {@code identifier} is not supported 100 */ 101 public static BuiltIn from(final String identifier) { 102 switch (identifier.toLowerCase()) { 103 case "aptrust": return APTRUST; 104 case "beyondtherepository": return BEYOND_THE_REPOSITORY; 105 case "default": return DEFAULT; 106 case "metaarchive": return METAARCHIVE; 107 case "perseids": return PERSEIDS; 108 default: throw new IllegalArgumentException("Unsupported profile identifier. Accepted values are: " + 109 Arrays.stream(BuiltIn.values()) 110 .map(BuiltIn::getIdentifier) 111 .collect(Collectors.joining(", "))); 112 } 113 } 114 115 /** 116 * Get the identifier associated with the profile 117 * 118 * @return the identifier 119 */ 120 public String getIdentifier() { 121 return identifier; 122 } 123 } 124 125 private static final Logger logger = getLogger(BagProfile.class); 126 127 private boolean allowFetch; 128 private Serialization serialization; 129 130 private Set<String> acceptedBagItVersions; 131 private Set<String> acceptedSerializations; 132 133 private Set<String> tagFilesAllowed; 134 private Set<String> tagFilesRequired; 135 136 private Set<String> allowedPayloadAlgorithms; 137 private Set<String> allowedTagAlgorithms; 138 139 private Set<String> payloadDigestAlgorithms; 140 private Set<String> tagDigestAlgorithms; 141 142 private Set<String> sections = new HashSet<>(); 143 private Map<String, Map<String, ProfileFieldRule>> metadataFields = new HashMap<>(); 144 private Map<String, String> profileMetadata = new HashMap<>(); 145 146 /** 147 * Load a BagProfile from a {@link BuiltIn} profile type 148 * 149 * @param builtInProfile the supported profile to load 150 * @throws IOException if there is an error reading the json 151 */ 152 public BagProfile(final BuiltIn builtInProfile) throws IOException { 153 final String resource = "profiles/" + builtInProfile.identifier + ".json"; 154 final URL resourceURL = this.getClass().getClassLoader().getResource(resource); 155 try (InputStream in = Objects.requireNonNull(resourceURL).openStream()) { 156 load(in); 157 } 158 } 159 160 /** 161 * Create a BagProfile from a given InputStream 162 * 163 * @param in InputStream containing the Bag profile JSON document 164 * @throws IOException when there is an I/O error reading JSON 165 */ 166 public BagProfile(final InputStream in) throws IOException { 167 load(in); 168 } 169 170 private void load(final InputStream in) throws IOException { 171 final ObjectMapper mapper = new ObjectMapper(); 172 final JsonNode json = mapper.readTree(in); 173 174 loadProfileInfo(json); 175 176 allowFetch = json.has(ALLOW_FETCH_TXT) ? json.get(ALLOW_FETCH_TXT).asBoolean() : true; 177 serialization = json.has(SERIALIZATION) ? Serialization.of(json.get(SERIALIZATION).asText()) 178 : Serialization.OPTIONAL; 179 180 acceptedBagItVersions = arrayValues(json, ACCEPT_BAGIT_VERSION); 181 acceptedSerializations = arrayValues(json, ACCEPT_SERIALIZATION); 182 183 tagFilesAllowed = arrayValues(json, TAG_FILES_ALLOWED); 184 tagFilesRequired = arrayValues(json, TAG_FILES_REQUIRED); 185 186 allowedPayloadAlgorithms = arrayValues(json, MANIFESTS_ALLOWED); 187 allowedTagAlgorithms = arrayValues(json, TAG_MANIFESTS_ALLOWED); 188 189 payloadDigestAlgorithms = arrayValues(json, MANIFESTS_REQUIRED); 190 tagDigestAlgorithms = arrayValues(json, TAG_MANIFESTS_REQUIRED); 191 192 metadataFields.put(BAG_INFO, metadataFields(json, BAG_INFO)); 193 sections.add(BAG_INFO); 194 195 if (json.get(OTHER_INFO) != null) { 196 loadOtherTags(json); 197 } 198 } 199 200 private void loadProfileInfo(final JsonNode json) { 201 final JsonNode tag = json.get(BAGIT_PROFILE_INFO); 202 if (tag != null) { 203 tag.fields().forEachRemaining(entry -> profileMetadata.put(entry.getKey(), entry.getValue().asText())); 204 } 205 } 206 207 private void loadOtherTags(final JsonNode json) { 208 final JsonNode arrayTags = json.get(OTHER_INFO); 209 if (arrayTags != null && arrayTags.isArray()) { 210 arrayTags.forEach(tag -> tag.fieldNames().forEachRemaining(sections::add)); 211 final Iterator<JsonNode> arrayEntries = arrayTags.elements(); 212 while (arrayEntries.hasNext()) { 213 final JsonNode entries = arrayEntries.next(); 214 final Iterator<String> tagNames = entries.fieldNames(); 215 while (tagNames.hasNext()) { 216 final String tagName = tagNames.next(); 217 metadataFields.put(tagName, metadataFields(entries, tagName)); 218 } 219 } 220 } 221 logger.debug("tagFiles is {}", sections); 222 logger.debug("metadataFields is {}", metadataFields); 223 } 224 225 private static Set<String> arrayValues(final JsonNode json, final String key) { 226 final JsonNode values = json.get(key); 227 228 if (values == null) { 229 return Collections.emptySet(); 230 } 231 232 final Set<String> results = new HashSet<>(); 233 for (int i = 0; i < values.size(); i++) { 234 results.add(values.get(i).asText()); 235 } 236 return results; 237 } 238 239 /** 240 * Loads required tags and allowed values 241 * 242 * @param json json to parse 243 * @param key key in json to load tags from 244 * @return map of tags => set of allowed values 245 */ 246 private static Map<String, ProfileFieldRule> metadataFields(final JsonNode json, final String key) { 247 final JsonNode fields = json.get(key); 248 249 if (fields == null) { 250 return Collections.emptyMap(); 251 } 252 253 final Map<String, ProfileFieldRule> results = new HashMap<>(); 254 for (final Iterator<String> it = fields.fieldNames(); it.hasNext(); ) { 255 // fields to pass to the ProfileFieldRule constructor 256 boolean required = false; 257 boolean repeatable = true; 258 boolean recommended = false; 259 String description = "No description"; 260 261 final String name = it.next(); 262 final JsonNode field = fields.get(name); 263 264 // read each of the fields for the ProfileFieldRule: 265 // required, repeated, recommended, description, and values 266 final JsonNode requiredNode = field.get("required"); 267 if (requiredNode != null && requiredNode.asBoolean()) { 268 required = requiredNode.asBoolean(); 269 } 270 271 final JsonNode repeatedNode = field.get("repeatable"); 272 if (repeatedNode != null) { 273 repeatable = repeatedNode.asBoolean(); 274 } 275 276 final JsonNode recommendedNode = field.get("recommended"); 277 if (recommendedNode != null && recommendedNode.asBoolean()) { 278 recommended = recommendedNode.asBoolean(); 279 } 280 281 final JsonNode descriptionNode = field.get("description"); 282 if (descriptionNode != null && descriptionNode.asText().isEmpty()) { 283 description = descriptionNode.asText(); 284 } 285 286 final Set<String> values = arrayValues(field, "values"); 287 288 results.put(name, new ProfileFieldRule(required, repeatable, recommended, description, values)); 289 } 290 291 return results; 292 } 293 294 /** 295 * Boolean flag allowing a fetch.txt file 296 * 297 * @return true if fetch.txt is allowed, false otherwise 298 */ 299 public boolean isAllowFetch() { 300 return allowFetch; 301 } 302 303 /** 304 * Get the support of serialization for a Bag. 305 * 306 * Allowed values are: forbidden, required, and optional 307 * 308 * @return String value of "forbidden", "required", or "optional" 309 */ 310 public Serialization getSerialization() { 311 return serialization; 312 } 313 314 /** 315 * Get the supported BagIt versions 316 * 317 * @return Set of BagIt version numbers 318 */ 319 public Set<String> getAcceptedBagItVersions() { 320 return acceptedBagItVersions; 321 } 322 323 /** 324 * Get the supported serialization formats 325 * 326 * If {@link BagProfile#getSerialization()} has a value of required or optional, at least one value is needed. 327 * If {@link BagProfile#getSerialization()} is forbidden, this has no meaning 328 * 329 * @return Set of serialization formats 330 */ 331 public Set<String> getAcceptedSerializations() { 332 return acceptedSerializations; 333 } 334 335 /** 336 * Get the names of allowed tag files; supports unix style globbing 337 * 338 * All the tag files listed in {@link BagProfile#getTagFilesRequired()} must be in included in this 339 * 340 * @return Set of allowed tag files 341 */ 342 public Set<String> getTagFilesAllowed() { 343 return tagFilesAllowed; 344 } 345 346 /** 347 * Get the tag files which are required to exist 348 * 349 * @return Set of tag filenames 350 */ 351 public Set<String> getTagFilesRequired() { 352 return tagFilesRequired; 353 } 354 355 /** 356 * Get the payload algorithms which are allowed 357 * 358 * When specified along with {@link BagProfile#getPayloadDigestAlgorithms()}, this must include at least all of the 359 * manifest types listed in {@link BagProfile#getPayloadDigestAlgorithms()}. 360 * 361 * @return Set of digest algorithm names 362 */ 363 public Set<String> getAllowedPayloadAlgorithms() { 364 return allowedPayloadAlgorithms; 365 } 366 367 /** 368 * Get the tag manifest algorithms which are allowed. 369 * 370 * When specified along with {@link BagProfile#getTagDigestAlgorithms()}, this must include at least all of the tag 371 * manifest types listed in {@link BagProfile#getTagDigestAlgorithms()}. 372 * 373 * @return Set of digest algorithm names 374 */ 375 public Set<String> getAllowedTagAlgorithms() { 376 return allowedTagAlgorithms; 377 } 378 379 /** 380 * Get the required digest algorithms for payload manifests. 381 * 382 * @return Set of digest algorithm names 383 */ 384 public Set<String> getPayloadDigestAlgorithms() { 385 return payloadDigestAlgorithms; 386 } 387 388 /** 389 * Get the required digest algorithms for tag manifests. 390 * 391 * @return Set of digest algorithm names 392 */ 393 public Set<String> getTagDigestAlgorithms() { 394 return tagDigestAlgorithms; 395 } 396 397 /** 398 * Get the required Bag-Info metadata fields. 399 * 400 * @return A map of field names to a ProfileFieldRule containing acceptance criteria 401 */ 402 public Map<String, ProfileFieldRule> getMetadataFields() { 403 return getMetadataFields(BAG_INFO); 404 } 405 406 /** 407 * Get the required tags for the extra tag file 408 * 409 * @param tagFile the tag file to get tags for 410 * @return map of tag = set of acceptable values, or null if tagFile doesn't exist 411 */ 412 public Map<String, ProfileFieldRule> getMetadataFields(final String tagFile) { 413 return metadataFields.get(tagFile); 414 } 415 416 /** 417 * Get all the section names in this profile, which can be used with getMetadataFields(). 418 * 419 * @return set of section names 420 */ 421 public Set<String> getSectionNames() { 422 return sections; 423 } 424 425 /** 426 * Get the BagIt-Profile-Info section describing the BagIt Profile 427 * 428 * @return map of fields names to text descriptions 429 */ 430 public Map<String, String> getProfileMetadata() { 431 return profileMetadata; 432 } 433 434 /** 435 * Validate a given BagConfig against the current profile 436 * 437 * @param config the BagConfig 438 */ 439 public void validateConfig(final BagConfig config) { 440 for (final String section : sections) { 441 final String tagFile = section.toLowerCase() + ".txt"; 442 if (config.hasTagFile(tagFile)) { 443 try { 444 ProfileValidationUtil.validate(section, getMetadataFields(section), 445 config.getFieldsForTagFile(tagFile)); 446 447 ProfileValidationUtil.validateTagIsAllowed(Paths.get(tagFile), tagFilesAllowed); 448 } catch (ProfileValidationException e) { 449 throw new RuntimeException(e.getMessage(), e); 450 } 451 } else { 452 throw new RuntimeException(String.format("Error missing section %s from bag config", section)); 453 } 454 } 455 } 456 457 458 /** 459 * Validate a given {@link Bag} against the current profile 460 * 461 * @param bag the Bag 462 */ 463 public void validateBag(final Bag bag) { 464 logger.info("Starting Bag to BagProfile conformance validator"); 465 466 final String tagIdentifier = "tag"; 467 final String fetchIdentifier = "fetch.txt"; 468 final String payloadIdentifier = "payload"; 469 final StringBuilder errors = new StringBuilder(); 470 471 final Path root = bag.getRootDir(); 472 final Set<Manifest> foundPayloadManifests = bag.getPayLoadManifests(); 473 final Set<Manifest> foundTagManifests = bag.getTagManifests(); 474 475 // check fetch rule 476 if (!allowFetch && (!bag.getItemsToFetch().isEmpty() || Files.exists(root.resolve(fetchIdentifier)))) { 477 errors.append("Profile does not allow a fetch.txt but fetch file found!\n"); 478 } 479 480 // check payload manifest algorithms 481 errors.append(ProfileValidationUtil.validateManifest(foundPayloadManifests, payloadDigestAlgorithms, 482 allowedPayloadAlgorithms, payloadIdentifier)); 483 484 // check tag manifest rules files allowed 485 // the reporting can be redundant if no tag manifests are found, so only check the allowed algorithms and 486 // tag files IF we have at least one tag manifest 487 if (foundTagManifests.isEmpty()) { 488 errors.append("No tag manifest found!\n"); 489 } else { 490 errors.append(ProfileValidationUtil.validateManifest(foundTagManifests, tagDigestAlgorithms, 491 allowedTagAlgorithms, tagIdentifier)); 492 493 // grab the first tag manifest and use that to check all registered tag files 494 final Manifest manifest = foundTagManifests.iterator().next(); 495 final Set<Path> existingTagFiles = manifest.getFileToChecksumMap().keySet(); 496 497 for (Path tag : existingTagFiles) { 498 final Path relativePath = tag.startsWith(root) ? root.relativize(tag) : tag; 499 try { 500 ProfileValidationUtil.validateTagIsAllowed(relativePath, tagFilesAllowed); 501 } catch (ProfileValidationException e) { 502 errors.append(e.getMessage()); 503 } 504 } 505 } 506 507 // check all required tag files exist 508 for (String tagName : tagFilesRequired) { 509 final Path requiredTag = root.resolve(tagName); 510 if (!requiredTag.toFile().exists()) { 511 errors.append("Required tag file \"").append(tagName).append("\" does not exist!\n"); 512 } 513 } 514 515 // check *-info required fields 516 for (String section : sections) { 517 final String tagFile = section.toLowerCase() + ".txt"; 518 final Path resolved = root.resolve(tagFile); 519 try { 520 ProfileValidationUtil.validate(section, metadataFields.get(section), resolved); 521 } catch (IOException e) { 522 // error - could not read info 523 errors.append("Could not read info from \"").append(tagFile).append("\"!\n"); 524 } catch (ProfileValidationException e) { 525 errors.append(e.getMessage()); 526 } 527 } 528 529 // check allowed bagit versions 530 if (!acceptedBagItVersions.contains(bag.getVersion().toString())) { 531 errors.append("BagIt version incompatible; accepted versions are ") 532 .append(acceptedBagItVersions) 533 .append("\n"); 534 } 535 536 // serialization seems unnecessary as the import export tool does not support importing serialized bags 537 if (serialization == Serialization.REQUIRED) { 538 logger.warn("Bag Profile requires serialization, import will continue if the bag has been deserialized"); 539 } 540 541 // finally, if we have any errors throw an exception 542 if (errors.length() > 0) { 543 throw new RuntimeException("Bag profile validation failure: The following errors occurred: \n" + 544 errors.toString()); 545 } 546 } 547}