001/*
002 * The contents of this file are subject to the license and copyright detailed
003 * in the LICENSE and NOTICE files at the root of the source tree.
004 */
005package org.duraspace.bagit;
006
007import static org.duraspace.bagit.BagProfileConstants.ACCEPT_BAGIT_VERSION;
008import static org.duraspace.bagit.BagProfileConstants.ACCEPT_SERIALIZATION;
009import static org.duraspace.bagit.BagProfileConstants.ALLOW_FETCH_TXT;
010import static org.duraspace.bagit.BagProfileConstants.BAGIT_PROFILE_INFO;
011import static org.duraspace.bagit.BagProfileConstants.BAGIT_TAG_SUFFIX;
012import static org.duraspace.bagit.BagProfileConstants.BAG_INFO;
013import static org.duraspace.bagit.BagProfileConstants.MANIFESTS_ALLOWED;
014import static org.duraspace.bagit.BagProfileConstants.MANIFESTS_REQUIRED;
015import static org.duraspace.bagit.BagProfileConstants.OTHER_INFO;
016import static org.duraspace.bagit.BagProfileConstants.SERIALIZATION;
017import static org.duraspace.bagit.BagProfileConstants.TAG_FILES_ALLOWED;
018import static org.duraspace.bagit.BagProfileConstants.TAG_FILES_REQUIRED;
019import static org.duraspace.bagit.BagProfileConstants.TAG_MANIFESTS_ALLOWED;
020import static org.duraspace.bagit.BagProfileConstants.TAG_MANIFESTS_REQUIRED;
021import static org.slf4j.LoggerFactory.getLogger;
022
023import java.io.IOException;
024import java.io.InputStream;
025import java.net.URL;
026import java.nio.file.Files;
027import java.nio.file.Path;
028import java.nio.file.Paths;
029import java.util.Arrays;
030import java.util.Collections;
031import java.util.HashMap;
032import java.util.HashSet;
033import java.util.Iterator;
034import java.util.Map;
035import java.util.Objects;
036import java.util.Set;
037import java.util.regex.Matcher;
038import java.util.regex.Pattern;
039import java.util.stream.Collectors;
040
041import com.fasterxml.jackson.databind.JsonNode;
042import com.fasterxml.jackson.databind.ObjectMapper;
043import gov.loc.repository.bagit.domain.Bag;
044import gov.loc.repository.bagit.domain.Manifest;
045import org.slf4j.Logger;
046
047/**
048 * A BagProfile contains the entire contents of a BagIt profile specified through the profile's json.
049 *
050 * @author mikejritter
051 * @author escowles
052 * @since 2016-12-12
053 */
054public class BagProfile {
055
056    public enum Serialization {
057        FORBIDDEN, REQUIRED, OPTIONAL, UNKNOWN;
058
059        /**
060         * Retrieve the {@link Serialization} from a string representation
061         *
062         * @param value the String value to use
063         * @return the {@link Serialization} the {@code value} is equal to
064         */
065        public static Serialization of(final String value) {
066            switch (value.toLowerCase()) {
067                case "forbidden": return FORBIDDEN;
068                case "required": return REQUIRED;
069                case "optional": return OPTIONAL;
070                default: return UNKNOWN;
071            }
072        }
073    }
074
075    /**
076     * Enum of the built in profiles which are provided with bagit-support
077     */
078    public enum BuiltIn {
079        APTRUST("aptrust"),
080        BEYOND_THE_REPOSITORY("beyondtherepository"),
081        DEFAULT("default"),
082        METAARCHIVE("metaarchive"),
083        PERSEIDS("perseids");
084
085        private final String identifier;
086
087        /**
088         * Default constructor
089         *
090         * @param identifier the identifier of the profile
091         */
092        BuiltIn(final String identifier) {
093            this.identifier = identifier;
094        }
095
096        /**
097         * Retrieve a built in profile from an identifier
098         *
099         * @param identifier the identifier to retrieve a profile for
100         * @return the {@link BuiltIn} profile
101         * @throws IllegalArgumentException if the {@code identifier} is not supported
102         */
103        public static BuiltIn from(final String identifier) {
104            switch (identifier.toLowerCase()) {
105                case "aptrust": return APTRUST;
106                case "beyondtherepository": return BEYOND_THE_REPOSITORY;
107                case "default": return DEFAULT;
108                case "metaarchive": return METAARCHIVE;
109                case "perseids": return PERSEIDS;
110                default: throw new IllegalArgumentException("Unsupported profile identifier. Accepted values are: " +
111                                                            Arrays.stream(BuiltIn.values())
112                                                                  .map(BuiltIn::getIdentifier)
113                                                                  .collect(Collectors.joining(", ")));
114            }
115        }
116
117        /**
118         * Get the identifier associated with the profile
119         *
120         * @return the identifier
121         */
122        public String getIdentifier() {
123            return identifier;
124        }
125    }
126
127    private static final Logger logger = getLogger(BagProfile.class);
128
129    private boolean allowFetch;
130    private Serialization serialization;
131
132    private Set<String> acceptedBagItVersions;
133    private Set<String> acceptedSerializations;
134
135    private Set<String> tagFilesAllowed;
136    private Set<String> tagFilesRequired;
137
138    private Set<String> allowedPayloadAlgorithms;
139    private Set<String> allowedTagAlgorithms;
140
141    private Set<String> payloadDigestAlgorithms;
142    private Set<String> tagDigestAlgorithms;
143
144    private Map<String, Map<String, ProfileFieldRule>> metadataFields = new HashMap<>();
145    private Map<String, String> profileMetadata = new HashMap<>();
146
147    /**
148     * Load a BagProfile from a {@link BuiltIn} profile type
149     *
150     * @param builtInProfile the supported profile to load
151     * @throws IOException if there is an error reading the json
152     */
153    public BagProfile(final BuiltIn builtInProfile) throws IOException {
154        final String resource = "profiles/" + builtInProfile.identifier + ".json";
155        final URL resourceURL = this.getClass().getClassLoader().getResource(resource);
156        try (InputStream in = Objects.requireNonNull(resourceURL).openStream()) {
157            load(in);
158        }
159    }
160
161    /**
162     * Create a BagProfile from a given InputStream
163     *
164     * @param in InputStream containing the Bag profile JSON document
165     * @throws IOException when there is an I/O error reading JSON
166     */
167    public BagProfile(final InputStream in) throws IOException {
168        load(in);
169    }
170
171    private void load(final InputStream in) throws IOException {
172        final ObjectMapper mapper = new ObjectMapper();
173        final JsonNode json = mapper.readTree(in);
174
175        loadProfileInfo(json);
176
177        allowFetch = json.has(ALLOW_FETCH_TXT) ? json.get(ALLOW_FETCH_TXT).asBoolean() : true;
178        serialization = json.has(SERIALIZATION) ? Serialization.of(json.get(SERIALIZATION).asText())
179                                                : Serialization.OPTIONAL;
180
181        acceptedBagItVersions = arrayValues(json, ACCEPT_BAGIT_VERSION);
182        acceptedSerializations = arrayValues(json, ACCEPT_SERIALIZATION);
183
184        tagFilesAllowed = arrayValues(json, TAG_FILES_ALLOWED);
185        tagFilesRequired = arrayValues(json, TAG_FILES_REQUIRED);
186
187        allowedPayloadAlgorithms = arrayValues(json, MANIFESTS_ALLOWED);
188        allowedTagAlgorithms = arrayValues(json, TAG_MANIFESTS_ALLOWED);
189
190        payloadDigestAlgorithms = arrayValues(json, MANIFESTS_REQUIRED);
191        tagDigestAlgorithms = arrayValues(json, TAG_MANIFESTS_REQUIRED);
192
193        metadataFields.put(BAG_INFO.toLowerCase(), metadataFields(json.get(BAG_INFO)));
194
195        if (json.get(OTHER_INFO) != null) {
196            loadOtherTags(json);
197        }
198    }
199
200    private void loadProfileInfo(final JsonNode json) {
201        final JsonNode tag = json.get(BAGIT_PROFILE_INFO);
202        if (tag != null) {
203            tag.fields().forEachRemaining(entry -> profileMetadata.put(entry.getKey(), entry.getValue().asText()));
204        }
205    }
206
207    private void loadOtherTags(final JsonNode json) {
208        final JsonNode arrayTags = json.get(OTHER_INFO);
209        if (arrayTags != null && arrayTags.isArray()) {
210            final Iterator<JsonNode> arrayEntries = arrayTags.elements();
211            while (arrayEntries.hasNext()) {
212                final JsonNode entries = arrayEntries.next();
213                final Iterator<Map.Entry<String, JsonNode>> fields = entries.fields();
214                while (fields.hasNext()) {
215                    final Map.Entry<String, JsonNode> entry = fields.next();
216                    final String tagName = entry.getKey().toLowerCase();
217                    metadataFields.put(tagName, metadataFields(entry.getValue()));
218                }
219            }
220        }
221        logger.debug("metadataFields is {}", metadataFields);
222    }
223
224    private static Set<String> arrayValues(final JsonNode json, final String key) {
225        final JsonNode values = json.get(key);
226
227        if (values == null) {
228            return Collections.emptySet();
229        }
230
231        final Set<String> results = new HashSet<>();
232        for (int i = 0; i < values.size(); i++) {
233            results.add(values.get(i).asText());
234        }
235        return results;
236    }
237
238    /**
239     * Loads required tags and allowed values
240     *
241     * @param json json to parse
242     * @return map of tags => set of allowed values
243     */
244    private static Map<String, ProfileFieldRule> metadataFields(final JsonNode json) {
245        if (json == null) {
246            return Collections.emptyMap();
247        }
248
249        final Map<String, ProfileFieldRule> results = new HashMap<>();
250        // why not use the entry to iterate?
251        for (final Iterator<String> it = json.fieldNames(); it.hasNext(); ) {
252            // fields to pass to the ProfileFieldRule constructor
253            boolean required = false;
254            boolean repeatable = true;
255            boolean recommended = false;
256            String description = "No description";
257
258            final String name = it.next();
259            final JsonNode field = json.get(name);
260
261            // read each of the fields for the ProfileFieldRule:
262            // required, repeated, recommended, description, and values
263            final JsonNode requiredNode = field.get("required");
264            if (requiredNode != null && requiredNode.asBoolean()) {
265                required = requiredNode.asBoolean();
266            }
267
268            final JsonNode repeatedNode = field.get("repeatable");
269            if (repeatedNode != null) {
270                repeatable = repeatedNode.asBoolean();
271            }
272
273            final JsonNode recommendedNode = field.get("recommended");
274            if (recommendedNode != null && recommendedNode.asBoolean()) {
275                recommended = recommendedNode.asBoolean();
276            }
277
278            final JsonNode descriptionNode = field.get("description");
279            if (descriptionNode != null && !descriptionNode.asText().isEmpty()) {
280                description = descriptionNode.asText();
281            }
282
283            final Set<String> values = arrayValues(field, "values");
284
285            results.put(name, new ProfileFieldRule(required, repeatable, recommended, description, values));
286        }
287
288        return results;
289    }
290
291    /**
292     * Retrieve the BagIt-Profile-Identifier for this profile
293     *
294     * @return the BagIt-Profile-Identifier, or an empty string if none is found
295     */
296    public String getIdentifier() {
297        return profileMetadata.getOrDefault(BagProfileConstants.BAGIT_PROFILE_IDENTIFIER, "");
298    }
299
300    /**
301     * Boolean flag allowing a fetch.txt file
302     *
303     * @return true if fetch.txt is allowed, false otherwise
304     */
305    public boolean isAllowFetch() {
306        return allowFetch;
307    }
308
309    /**
310     * Get the support of serialization for a Bag.
311     *
312     * Allowed values are: forbidden, required, and optional
313     *
314     * @return String value of "forbidden", "required", or "optional"
315     */
316    public Serialization getSerialization() {
317        return serialization;
318    }
319
320    /**
321     * Get the supported BagIt versions
322     *
323     * @return Set of BagIt version numbers
324     */
325    public Set<String> getAcceptedBagItVersions() {
326        return acceptedBagItVersions;
327    }
328
329    /**
330     * Get the supported serialization formats
331     *
332     * If {@link BagProfile#getSerialization()} has a value of required or optional, at least one value is needed.
333     * If {@link BagProfile#getSerialization()} is forbidden, this has no meaning
334     *
335     * @return Set of serialization formats
336     */
337    public Set<String> getAcceptedSerializations() {
338        return acceptedSerializations;
339    }
340
341    /**
342     * Get the names of allowed tag files; supports unix style globbing
343     *
344     * All the tag files listed in {@link BagProfile#getTagFilesRequired()} must be in included in this
345     *
346     * @return Set of allowed tag files
347     */
348    public Set<String> getTagFilesAllowed() {
349        return tagFilesAllowed;
350    }
351
352    /**
353     * Get the tag files which are required to exist
354     *
355     * @return Set of tag filenames
356     */
357    public Set<String> getTagFilesRequired() {
358        return tagFilesRequired;
359    }
360
361    /**
362     * Get the payload algorithms which are allowed
363     *
364     * When specified along with {@link BagProfile#getPayloadDigestAlgorithms()}, this must include at least all of the
365     * manifest types listed in {@link BagProfile#getPayloadDigestAlgorithms()}.
366     *
367     * @return Set of digest algorithm names
368     */
369    public Set<String> getAllowedPayloadAlgorithms() {
370        return allowedPayloadAlgorithms;
371    }
372
373    /**
374     * Get the tag manifest algorithms which are allowed.
375     *
376     * When specified along with {@link BagProfile#getTagDigestAlgorithms()}, this must include at least all of the tag
377     * manifest types listed in {@link BagProfile#getTagDigestAlgorithms()}.
378     *
379     * @return Set of digest algorithm names
380     */
381    public Set<String> getAllowedTagAlgorithms() {
382        return allowedTagAlgorithms;
383    }
384
385    /**
386     * Get the required digest algorithms for payload manifests.
387     *
388     * @return Set of digest algorithm names
389     */
390    public Set<String> getPayloadDigestAlgorithms() {
391        return payloadDigestAlgorithms;
392    }
393
394    /**
395     * Get the required digest algorithms for tag manifests.
396     *
397     * @return Set of digest algorithm names
398     */
399    public Set<String> getTagDigestAlgorithms() {
400        return tagDigestAlgorithms;
401    }
402
403    /**
404     * Get the required Bag-Info metadata fields.
405     *
406     * @return A map of field names to a ProfileFieldRule containing acceptance criteria
407     */
408    public Map<String, ProfileFieldRule> getMetadataFields() {
409        return getMetadataFields(BAG_INFO);
410    }
411
412    /**
413     * Get the required tags for the extra tag file
414     *
415     * @param tagFile the tag file to get tags for
416     * @return map of tag = set of acceptable values, or null if tagFile doesn't exist
417     */
418    public Map<String, ProfileFieldRule> getMetadataFields(final String tagFile) {
419        return metadataFields.get(tagFile.toLowerCase());
420    }
421
422    /**
423     * Get all the section names in this profile, which can be used with getMetadataFields().
424     *
425     * @return set of section names
426     */
427    public Set<String> getSectionNames() {
428        return metadataFields.keySet();
429    }
430
431    /**
432     * Get the BagIt-Profile-Info section describing the BagIt Profile
433     *
434     * @return map of fields names to text descriptions
435     */
436    public Map<String, String> getProfileMetadata() {
437        return profileMetadata;
438    }
439
440    /**
441     * Validate a given BagConfig against the current profile
442     *
443     * @param config the BagConfig
444     */
445    public void validateConfig(final BagConfig config) {
446        checkRequiredTagsExist(config.getTagFiles());
447        for (final String section : config.getTagFiles()) {
448            validateTag(section, config.getFieldsForTagFile(section));
449        }
450    }
451
452    /**
453     * Validate a configuration for tag files based on a mapping of BagIt tag filenames to key-value pairs.
454     *
455     * e.g. the filename "bag-info.txt" could contain the pairs "Source-Organization: DuraSpace" and
456     * "Organization-Address: The Cloud"
457     *
458     * @param config the Map containing the configuration of BagIt tag files
459     */
460    public void validateTagFiles(final Map<String, Map<String, String>> config) {
461        checkRequiredTagsExist(config.keySet());
462        config.forEach(this::validateTag);
463    }
464
465    /**
466     * Test that all required tag files exist
467     *
468     * @param tags the name of each tag file to check
469     */
470    private void checkRequiredTagsExist(final Set<String> tags) {
471        for (String section : metadataFields.keySet()) {
472            final String expected = section + BAGIT_TAG_SUFFIX;
473            if (!tags.contains(expected)) {
474                throw new RuntimeException("Missing configuration for required tag file " + expected);
475            }
476        }
477    }
478
479    /**
480     * Validate a Mapping of key value pairs for a tag file
481     *
482     * @param filename the name of the tag file to validate
483     * @param fields A mapping of tag file names and their fields to validate
484     */
485    private void validateTag(final String filename, final Map<String, String> fields) {
486        // strip the trailing file extension
487        final String section = getSection(filename);
488        logger.debug("Checking validation for {}", section);
489        if (metadataFields.containsKey(section)) {
490            try {
491                ProfileValidationUtil.validate(section, getMetadataFields(section), fields);
492                ProfileValidationUtil.validateTagIsAllowed(Paths.get(filename), tagFilesAllowed);
493            } catch (ProfileValidationException e) {
494                throw new RuntimeException(e.getMessage(), e);
495            }
496        }
497    }
498
499    /**
500     * Normalize a filename to be what we expect is held in the MetadataFields key set
501     *
502     * @param filename the filename to normalize
503     * @return the filename without a tag extension, so that it can be used with the metadataFields
504     */
505    private String getSection(final String filename) {
506        // use two regexps
507        // the main pattern: two groups - a wildcard matcher for the filename and the tag suffix
508        // the replacement: just the first capture group
509        final String replacement = "$1";
510        final Pattern tagEnding = Pattern.compile("(.*)(\\" + BAGIT_TAG_SUFFIX + ")");
511        final Matcher matcher = tagEnding.matcher(filename.toLowerCase());
512        return matcher.replaceAll(replacement);
513    }
514
515    /**
516     * Validate a given {@link Bag} against the current profile
517     *
518     * @param bag the Bag
519     */
520    public void validateBag(final Bag bag) {
521        logger.info("Starting Bag to BagProfile conformance validator");
522
523        final String tagIdentifier = "tag";
524        final String fetchIdentifier = "fetch.txt";
525        final String payloadIdentifier = "payload";
526        final StringBuilder errors = new StringBuilder();
527
528        final Path root = bag.getRootDir();
529        final Set<Manifest> foundPayloadManifests = bag.getPayLoadManifests();
530        final Set<Manifest> foundTagManifests = bag.getTagManifests();
531
532        // check fetch rule
533        if (!allowFetch && (!bag.getItemsToFetch().isEmpty() || Files.exists(root.resolve(fetchIdentifier)))) {
534            errors.append("Profile does not allow a fetch.txt but fetch file found!\n");
535        }
536
537        // check payload manifest algorithms
538        errors.append(ProfileValidationUtil.validateManifest(foundPayloadManifests, payloadDigestAlgorithms,
539                                                             allowedPayloadAlgorithms, payloadIdentifier));
540
541        // check tag manifest rules files allowed
542        // the reporting can be redundant if no tag manifests are found, so only check the allowed algorithms and
543        // tag files IF we have at least one tag manifest
544        if (foundTagManifests.isEmpty()) {
545            errors.append("No tag manifest found!\n");
546        } else {
547            errors.append(ProfileValidationUtil.validateManifest(foundTagManifests, tagDigestAlgorithms,
548                                                                 allowedTagAlgorithms, tagIdentifier));
549
550            // grab the first tag manifest and use that to check all registered tag files
551            final Manifest manifest = foundTagManifests.iterator().next();
552            final Set<Path> existingTagFiles = manifest.getFileToChecksumMap().keySet();
553
554            for (Path tag : existingTagFiles) {
555                final Path relativePath = tag.startsWith(root) ? root.relativize(tag) : tag;
556                try {
557                    ProfileValidationUtil.validateTagIsAllowed(relativePath, tagFilesAllowed);
558                } catch (ProfileValidationException e) {
559                    errors.append(e.getMessage());
560                }
561            }
562        }
563
564        // check all required tag files exist
565        for (String tagName : tagFilesRequired) {
566            final Path requiredTag = root.resolve(tagName);
567            if (!requiredTag.toFile().exists()) {
568                errors.append("Required tag file \"").append(tagName).append("\" does not exist!\n");
569            }
570        }
571
572        // check *-info required fields
573        for (String section : metadataFields.keySet()) {
574            final String tagFile = section.toLowerCase() + BAGIT_TAG_SUFFIX;
575            final Path resolved = root.resolve(tagFile);
576            try {
577                ProfileValidationUtil.validate(section, metadataFields.get(section), resolved);
578            } catch (IOException e) {
579                // error - could not read info
580                errors.append("Could not read info from \"").append(tagFile).append("\"!\n");
581            } catch (ProfileValidationException e) {
582                errors.append(e.getMessage());
583            }
584        }
585
586        // check allowed bagit versions
587        if (!acceptedBagItVersions.contains(bag.getVersion().toString())) {
588            errors.append("BagIt version incompatible; accepted versions are ")
589                  .append(acceptedBagItVersions)
590                  .append("\n");
591        }
592
593        // serialization seems unnecessary as the import export tool does not support importing serialized bags
594        if (serialization == Serialization.REQUIRED) {
595            logger.warn("Bag Profile requires serialization, import will continue if the bag has been deserialized");
596        }
597
598        // finally, if we have any errors throw an exception
599        if (errors.length() > 0) {
600            throw new RuntimeException("Bag profile validation failure: The following errors occurred: \n" +
601                                       errors.toString());
602        }
603    }
604}