001/*
002 * The contents of this file are subject to the license and copyright detailed
003 * in the LICENSE and NOTICE files at the root of the source tree.
004 */
005package org.duraspace.bagit;
006
007import static org.duraspace.bagit.BagProfileConstants.ACCEPT_BAGIT_VERSION;
008import static org.duraspace.bagit.BagProfileConstants.ACCEPT_SERIALIZATION;
009import static org.duraspace.bagit.BagProfileConstants.ALLOW_FETCH_TXT;
010import static org.duraspace.bagit.BagProfileConstants.BAGIT_PROFILE_INFO;
011import static org.duraspace.bagit.BagProfileConstants.BAG_INFO;
012import static org.duraspace.bagit.BagProfileConstants.MANIFESTS_ALLOWED;
013import static org.duraspace.bagit.BagProfileConstants.MANIFESTS_REQUIRED;
014import static org.duraspace.bagit.BagProfileConstants.OTHER_INFO;
015import static org.duraspace.bagit.BagProfileConstants.SERIALIZATION;
016import static org.duraspace.bagit.BagProfileConstants.TAG_FILES_ALLOWED;
017import static org.duraspace.bagit.BagProfileConstants.TAG_FILES_REQUIRED;
018import static org.duraspace.bagit.BagProfileConstants.TAG_MANIFESTS_ALLOWED;
019import static org.duraspace.bagit.BagProfileConstants.TAG_MANIFESTS_REQUIRED;
020import static org.slf4j.LoggerFactory.getLogger;
021
022import java.io.IOException;
023import java.io.InputStream;
024import java.net.URL;
025import java.nio.file.Files;
026import java.nio.file.Path;
027import java.nio.file.Paths;
028import java.util.Arrays;
029import java.util.Collections;
030import java.util.HashMap;
031import java.util.HashSet;
032import java.util.Iterator;
033import java.util.Map;
034import java.util.Objects;
035import java.util.Set;
036import java.util.stream.Collectors;
037
038import org.slf4j.Logger;
039
040import com.fasterxml.jackson.databind.JsonNode;
041import com.fasterxml.jackson.databind.ObjectMapper;
042import gov.loc.repository.bagit.domain.Bag;
043import gov.loc.repository.bagit.domain.Manifest;
044
045/**
046 * A BagProfile contains the entire contents of a BagIt profile specified through the profile's json.
047 *
048 * @author mikejritter
049 * @author escowles
050 * @since 2016-12-12
051 */
052public class BagProfile {
053
054    public enum Serialization {
055        FORBIDDEN, REQUIRED, OPTIONAL, UNKNOWN;
056
057        /**
058         * Retrieve the {@link Serialization} from a string representation
059         *
060         * @param value the String value to use
061         * @return the {@link Serialization} the {@code value} is equal to
062         */
063        public static Serialization of(final String value) {
064            switch (value.toLowerCase()) {
065                case "forbidden": return FORBIDDEN;
066                case "required": return REQUIRED;
067                case "optional": return OPTIONAL;
068                default: return UNKNOWN;
069            }
070        }
071    }
072
073    /**
074     * Enum of the built in profiles which are provided with bagit-support
075     */
076    public enum BuiltIn {
077        APTRUST("aptrust"),
078        BEYOND_THE_REPOSITORY("beyondtherepository"),
079        DEFAULT("default"),
080        METAARCHIVE("metaarchive"),
081        PERSEIDS("perseids");
082
083        private final String identifier;
084
085        /**
086         * Default constructor
087         *
088         * @param identifier the identifier of the profile
089         */
090        BuiltIn(final String identifier) {
091            this.identifier = identifier;
092        }
093
094        /**
095         * Retrieve a built in profile from an identifier
096         *
097         * @param identifier the identifier to retrieve a profile for
098         * @return the {@link BuiltIn} profile
099         * @throws IllegalArgumentException if the {@code identifier} is not supported
100         */
101        public static BuiltIn from(final String identifier) {
102            switch (identifier.toLowerCase()) {
103                case "aptrust": return APTRUST;
104                case "beyondtherepository": return BEYOND_THE_REPOSITORY;
105                case "default": return DEFAULT;
106                case "metaarchive": return METAARCHIVE;
107                case "perseids": return PERSEIDS;
108                default: throw new IllegalArgumentException("Unsupported profile identifier. Accepted values are: " +
109                                                            Arrays.stream(BuiltIn.values())
110                                                                  .map(BuiltIn::getIdentifier)
111                                                                  .collect(Collectors.joining(", ")));
112            }
113        }
114
115        /**
116         * Get the identifier associated with the profile
117         *
118         * @return the identifier
119         */
120        public String getIdentifier() {
121            return identifier;
122        }
123    }
124
125    private static final Logger logger = getLogger(BagProfile.class);
126
127    private boolean allowFetch;
128    private Serialization serialization;
129
130    private Set<String> acceptedBagItVersions;
131    private Set<String> acceptedSerializations;
132
133    private Set<String> tagFilesAllowed;
134    private Set<String> tagFilesRequired;
135
136    private Set<String> allowedPayloadAlgorithms;
137    private Set<String> allowedTagAlgorithms;
138
139    private Set<String> payloadDigestAlgorithms;
140    private Set<String> tagDigestAlgorithms;
141
142    private Set<String> sections = new HashSet<>();
143    private Map<String, Map<String, ProfileFieldRule>> metadataFields = new HashMap<>();
144    private Map<String, String> profileMetadata = new HashMap<>();
145
146    /**
147     * Load a BagProfile from a {@link BuiltIn} profile type
148     *
149     * @param builtInProfile the supported profile to load
150     * @throws IOException if there is an error reading the json
151     */
152    public BagProfile(final BuiltIn builtInProfile) throws IOException {
153        final String resource = "profiles/" + builtInProfile.identifier + ".json";
154        final URL resourceURL = this.getClass().getClassLoader().getResource(resource);
155        try (InputStream in = Objects.requireNonNull(resourceURL).openStream()) {
156            load(in);
157        }
158    }
159
160    /**
161     * Create a BagProfile from a given InputStream
162     *
163     * @param in InputStream containing the Bag profile JSON document
164     * @throws IOException when there is an I/O error reading JSON
165     */
166    public BagProfile(final InputStream in) throws IOException {
167        load(in);
168    }
169
170    private void load(final InputStream in) throws IOException {
171        final ObjectMapper mapper = new ObjectMapper();
172        final JsonNode json = mapper.readTree(in);
173
174        loadProfileInfo(json);
175
176        allowFetch = json.has(ALLOW_FETCH_TXT) ? json.get(ALLOW_FETCH_TXT).asBoolean() : true;
177        serialization = json.has(SERIALIZATION) ? Serialization.of(json.get(SERIALIZATION).asText())
178                                                : Serialization.OPTIONAL;
179
180        acceptedBagItVersions = arrayValues(json, ACCEPT_BAGIT_VERSION);
181        acceptedSerializations = arrayValues(json, ACCEPT_SERIALIZATION);
182
183        tagFilesAllowed = arrayValues(json, TAG_FILES_ALLOWED);
184        tagFilesRequired = arrayValues(json, TAG_FILES_REQUIRED);
185
186        allowedPayloadAlgorithms = arrayValues(json, MANIFESTS_ALLOWED);
187        allowedTagAlgorithms = arrayValues(json, TAG_MANIFESTS_ALLOWED);
188
189        payloadDigestAlgorithms = arrayValues(json, MANIFESTS_REQUIRED);
190        tagDigestAlgorithms = arrayValues(json, TAG_MANIFESTS_REQUIRED);
191
192        metadataFields.put(BAG_INFO, metadataFields(json, BAG_INFO));
193        sections.add(BAG_INFO);
194
195        if (json.get(OTHER_INFO) != null) {
196            loadOtherTags(json);
197        }
198    }
199
200    private void loadProfileInfo(final JsonNode json) {
201        final JsonNode tag = json.get(BAGIT_PROFILE_INFO);
202        if (tag != null) {
203            tag.fields().forEachRemaining(entry -> profileMetadata.put(entry.getKey(), entry.getValue().asText()));
204        }
205    }
206
207    private void loadOtherTags(final JsonNode json) {
208        final JsonNode arrayTags = json.get(OTHER_INFO);
209        if (arrayTags != null && arrayTags.isArray()) {
210            arrayTags.forEach(tag -> tag.fieldNames().forEachRemaining(sections::add));
211            final Iterator<JsonNode> arrayEntries = arrayTags.elements();
212            while (arrayEntries.hasNext()) {
213                final JsonNode entries = arrayEntries.next();
214                final Iterator<String> tagNames = entries.fieldNames();
215                while (tagNames.hasNext()) {
216                    final String tagName = tagNames.next();
217                    metadataFields.put(tagName, metadataFields(entries, tagName));
218                }
219            }
220        }
221        logger.debug("tagFiles is {}", sections);
222        logger.debug("metadataFields is {}", metadataFields);
223    }
224
225    private static Set<String> arrayValues(final JsonNode json, final String key) {
226        final JsonNode values = json.get(key);
227
228        if (values == null) {
229            return Collections.emptySet();
230        }
231
232        final Set<String> results = new HashSet<>();
233        for (int i = 0; i < values.size(); i++) {
234            results.add(values.get(i).asText());
235        }
236        return results;
237    }
238
239    /**
240     * Loads required tags and allowed values
241     *
242     * @param json json to parse
243     * @param key key in json to load tags from
244     * @return map of tags => set of allowed values
245     */
246    private static Map<String, ProfileFieldRule> metadataFields(final JsonNode json, final String key) {
247        final JsonNode fields = json.get(key);
248
249        if (fields == null) {
250            return Collections.emptyMap();
251        }
252
253        final Map<String, ProfileFieldRule> results = new HashMap<>();
254        for (final Iterator<String> it = fields.fieldNames(); it.hasNext(); ) {
255            // fields to pass to the ProfileFieldRule constructor
256            boolean required = false;
257            boolean repeatable = true;
258            boolean recommended = false;
259            String description = "No description";
260
261            final String name = it.next();
262            final JsonNode field = fields.get(name);
263
264            // read each of the fields for the ProfileFieldRule:
265            // required, repeated, recommended, description, and values
266            final JsonNode requiredNode = field.get("required");
267            if (requiredNode != null && requiredNode.asBoolean()) {
268                required = requiredNode.asBoolean();
269            }
270
271            final JsonNode repeatedNode = field.get("repeatable");
272            if (repeatedNode != null) {
273                repeatable = repeatedNode.asBoolean();
274            }
275
276            final JsonNode recommendedNode = field.get("recommended");
277            if (recommendedNode != null && recommendedNode.asBoolean()) {
278                recommended = recommendedNode.asBoolean();
279            }
280
281            final JsonNode descriptionNode = field.get("description");
282            if (descriptionNode != null && descriptionNode.asText().isEmpty()) {
283                description = descriptionNode.asText();
284            }
285
286            final Set<String> values = arrayValues(field, "values");
287
288            results.put(name, new ProfileFieldRule(required, repeatable, recommended, description, values));
289        }
290
291        return results;
292    }
293
294    /**
295     * Boolean flag allowing a fetch.txt file
296     *
297     * @return true if fetch.txt is allowed, false otherwise
298     */
299    public boolean isAllowFetch() {
300        return allowFetch;
301    }
302
303    /**
304     * Get the support of serialization for a Bag.
305     *
306     * Allowed values are: forbidden, required, and optional
307     *
308     * @return String value of "forbidden", "required", or "optional"
309     */
310    public Serialization getSerialization() {
311        return serialization;
312    }
313
314    /**
315     * Get the supported BagIt versions
316     *
317     * @return Set of BagIt version numbers
318     */
319    public Set<String> getAcceptedBagItVersions() {
320        return acceptedBagItVersions;
321    }
322
323    /**
324     * Get the supported serialization formats
325     *
326     * If {@link BagProfile#getSerialization()} has a value of required or optional, at least one value is needed.
327     * If {@link BagProfile#getSerialization()} is forbidden, this has no meaning
328     *
329     * @return Set of serialization formats
330     */
331    public Set<String> getAcceptedSerializations() {
332        return acceptedSerializations;
333    }
334
335    /**
336     * Get the names of allowed tag files; supports unix style globbing
337     *
338     * All the tag files listed in {@link BagProfile#getTagFilesRequired()} must be in included in this
339     *
340     * @return Set of allowed tag files
341     */
342    public Set<String> getTagFilesAllowed() {
343        return tagFilesAllowed;
344    }
345
346    /**
347     * Get the tag files which are required to exist
348     *
349     * @return Set of tag filenames
350     */
351    public Set<String> getTagFilesRequired() {
352        return tagFilesRequired;
353    }
354
355    /**
356     * Get the payload algorithms which are allowed
357     *
358     * When specified along with {@link BagProfile#getPayloadDigestAlgorithms()}, this must include at least all of the
359     * manifest types listed in {@link BagProfile#getPayloadDigestAlgorithms()}.
360     *
361     * @return Set of digest algorithm names
362     */
363    public Set<String> getAllowedPayloadAlgorithms() {
364        return allowedPayloadAlgorithms;
365    }
366
367    /**
368     * Get the tag manifest algorithms which are allowed.
369     *
370     * When specified along with {@link BagProfile#getTagDigestAlgorithms()}, this must include at least all of the tag
371     * manifest types listed in {@link BagProfile#getTagDigestAlgorithms()}.
372     *
373     * @return Set of digest algorithm names
374     */
375    public Set<String> getAllowedTagAlgorithms() {
376        return allowedTagAlgorithms;
377    }
378
379    /**
380     * Get the required digest algorithms for payload manifests.
381     *
382     * @return Set of digest algorithm names
383     */
384    public Set<String> getPayloadDigestAlgorithms() {
385        return payloadDigestAlgorithms;
386    }
387
388    /**
389     * Get the required digest algorithms for tag manifests.
390     *
391     * @return Set of digest algorithm names
392     */
393    public Set<String> getTagDigestAlgorithms() {
394        return tagDigestAlgorithms;
395    }
396
397    /**
398     * Get the required Bag-Info metadata fields.
399     *
400     * @return A map of field names to a ProfileFieldRule containing acceptance criteria
401     */
402    public Map<String, ProfileFieldRule> getMetadataFields() {
403        return getMetadataFields(BAG_INFO);
404    }
405
406    /**
407     * Get the required tags for the extra tag file
408     *
409     * @param tagFile the tag file to get tags for
410     * @return map of tag = set of acceptable values, or null if tagFile doesn't exist
411     */
412    public Map<String, ProfileFieldRule> getMetadataFields(final String tagFile) {
413        return metadataFields.get(tagFile);
414    }
415
416    /**
417     * Get all the section names in this profile, which can be used with getMetadataFields().
418     *
419     * @return set of section names
420     */
421    public Set<String> getSectionNames() {
422        return sections;
423    }
424
425    /**
426     * Get the BagIt-Profile-Info section describing the BagIt Profile
427     *
428     * @return map of fields names to text descriptions
429     */
430    public Map<String, String> getProfileMetadata() {
431        return profileMetadata;
432    }
433
434    /**
435     * Validate a given BagConfig against the current profile
436     *
437     * @param config the BagConfig
438     */
439    public void validateConfig(final BagConfig config) {
440        for (final String section : sections) {
441            final String tagFile = section.toLowerCase() + ".txt";
442            if (config.hasTagFile(tagFile)) {
443                try {
444                    ProfileValidationUtil.validate(section, getMetadataFields(section),
445                                                   config.getFieldsForTagFile(tagFile));
446
447                    ProfileValidationUtil.validateTagIsAllowed(Paths.get(tagFile), tagFilesAllowed);
448                } catch (ProfileValidationException e) {
449                    throw new RuntimeException(e.getMessage(), e);
450                }
451            } else {
452                throw new RuntimeException(String.format("Error missing section %s from bag config", section));
453            }
454        }
455    }
456
457
458    /**
459     * Validate a given {@link Bag} against the current profile
460     *
461     * @param bag the Bag
462     */
463    public void validateBag(final Bag bag) {
464        logger.info("Starting Bag to BagProfile conformance validator");
465
466        final String tagIdentifier = "tag";
467        final String fetchIdentifier = "fetch.txt";
468        final String payloadIdentifier = "payload";
469        final StringBuilder errors = new StringBuilder();
470
471        final Path root = bag.getRootDir();
472        final Set<Manifest> foundPayloadManifests = bag.getPayLoadManifests();
473        final Set<Manifest> foundTagManifests = bag.getTagManifests();
474
475        // check fetch rule
476        if (!allowFetch && (!bag.getItemsToFetch().isEmpty() || Files.exists(root.resolve(fetchIdentifier)))) {
477            errors.append("Profile does not allow a fetch.txt but fetch file found!\n");
478        }
479
480        // check payload manifest algorithms
481        errors.append(ProfileValidationUtil.validateManifest(foundPayloadManifests, payloadDigestAlgorithms,
482                                            allowedPayloadAlgorithms, payloadIdentifier));
483
484        // check tag manifest rules files allowed
485        // the reporting can be redundant if no tag manifests are found, so only check the allowed algorithms and
486        // tag files IF we have at least one tag manifest
487        if (foundTagManifests.isEmpty()) {
488            errors.append("No tag manifest found!\n");
489        } else {
490            errors.append(ProfileValidationUtil.validateManifest(foundTagManifests, tagDigestAlgorithms,
491                                                allowedTagAlgorithms, tagIdentifier));
492
493            // grab the first tag manifest and use that to check all registered tag files
494            final Manifest manifest = foundTagManifests.iterator().next();
495            final Set<Path> existingTagFiles = manifest.getFileToChecksumMap().keySet();
496
497            for (Path tag : existingTagFiles) {
498                final Path relativePath = tag.startsWith(root) ? root.relativize(tag) : tag;
499                try {
500                    ProfileValidationUtil.validateTagIsAllowed(relativePath, tagFilesAllowed);
501                } catch (ProfileValidationException e) {
502                    errors.append(e.getMessage());
503                }
504            }
505        }
506
507        // check all required tag files exist
508        for (String tagName : tagFilesRequired) {
509            final Path requiredTag = root.resolve(tagName);
510            if (!requiredTag.toFile().exists()) {
511                errors.append("Required tag file \"").append(tagName).append("\" does not exist!\n");
512            }
513        }
514
515        // check *-info required fields
516        for (String section : sections) {
517            final String tagFile = section.toLowerCase() + ".txt";
518            final Path resolved = root.resolve(tagFile);
519            try {
520                ProfileValidationUtil.validate(section, metadataFields.get(section), resolved);
521            } catch (IOException e) {
522                // error - could not read info
523                errors.append("Could not read info from \"").append(tagFile).append("\"!\n");
524            } catch (ProfileValidationException e) {
525                errors.append(e.getMessage());
526            }
527        }
528
529        // check allowed bagit versions
530        if (!acceptedBagItVersions.contains(bag.getVersion().toString())) {
531            errors.append("BagIt version incompatible; accepted versions are ")
532                  .append(acceptedBagItVersions)
533                  .append("\n");
534        }
535
536        // serialization seems unnecessary as the import export tool does not support importing serialized bags
537        if (serialization == Serialization.REQUIRED) {
538            logger.warn("Bag Profile requires serialization, import will continue if the bag has been deserialized");
539        }
540
541        // finally, if we have any errors throw an exception
542        if (errors.length() > 0) {
543            throw new RuntimeException("Bag profile validation failure: The following errors occurred: \n" +
544                                       errors.toString());
545        }
546    }
547}