001/*
002 * The contents of this file are subject to the license and copyright detailed
003 * in the LICENSE and NOTICE files at the root of the source tree.
004 */
005package org.duraspace.bagit;
006
007import static org.slf4j.LoggerFactory.getLogger;
008
009import java.io.IOException;
010import java.nio.file.FileSystems;
011import java.nio.file.Files;
012import java.nio.file.Path;
013import java.nio.file.PathMatcher;
014import java.util.Arrays;
015import java.util.Collections;
016import java.util.HashMap;
017import java.util.HashSet;
018import java.util.Map;
019import java.util.Set;
020import java.util.concurrent.atomic.AtomicReference;
021import java.util.regex.Pattern;
022import java.util.stream.Stream;
023
024import gov.loc.repository.bagit.domain.Manifest;
025import org.slf4j.Logger;
026
027/**
028 * Utility methods for validating profiles.
029 *
030 * @author mikejritter
031 * @author dbernstein
032 * @since Dec 14, 2016
033 */
034public class ProfileValidationUtil {
035
036    private static final Logger logger = getLogger(ProfileValidationUtil.class);
037
038    /*
039     * System generated Bag Info that should be ignored by validator.
040     */
041    protected static final Set<String> SYSTEM_GENERATED_FIELD_NAMES =
042            new HashSet<>(Arrays.asList("Bagging-Date",
043                                        "Bag-Size",
044                                        "Bag-Count",
045                                        "Payload-Oxum",
046                                        "BagIt-Profile-Identifier"));
047
048    private ProfileValidationUtil() {
049    }
050
051    /**
052     * Validates a {@code tag} file against a set of {@code requiredFields} and their constrained values.
053     *
054     * @param profileSection describes the section of the profile that is being validated.
055     * @param requiredFields the required fields and associated rule
056     * @param tag the path to the info file to read
057     * @throws IOException when the {@code tag} file cannot be read
058     * @throws ProfileValidationException when the fields do not pass muster. The exception message contains a
059     *         description of all validation errors found.
060     */
061    public static void validate(final String profileSection, final Map<String, ProfileFieldRule> requiredFields,
062                                final Path tag) throws ProfileValidationException, IOException {
063        final Map<String, String> fields = readInfo(tag);
064        validate(profileSection, requiredFields, fields, Collections.emptySet());
065    }
066
067    /**
068     * Validates the {@code fields} against a set of {@code requiredFields} and their constrained values. This is
069     * intended to be used in conjunction with {@link BagConfig} and will filter on certain fields defined by the
070     * {@code SYSTEM_GENERATED_FIELD_NAMES} constant.
071     *
072     * @param profileSection describes the section of the profile that is being validated.
073     * @param requiredFields the required fields and associated rule
074     * @param fields the key value pairs to be validated
075     * @throws ProfileValidationException when the fields do not pass muster. The exception message contains a
076     *         description of all validation errors found.
077     */
078    public static void validate(final String profileSection, final Map<String, ProfileFieldRule> requiredFields,
079                                final Map<String, String> fields) throws ProfileValidationException {
080        validate(profileSection, requiredFields, fields, SYSTEM_GENERATED_FIELD_NAMES);
081    }
082
083    /**
084     * Validates the fields against the set of required fields and their constrained values.
085     *
086     * @param profileSection describes the section of the profile that is being validated.
087     * @param requiredFields the required fields and any allowable values (if constrained).
088     * @param fields The key value pairs to be validated.
089     * @param filter A set of fields to ignore when validating. Useful for export.
090     * @throws ProfileValidationException when the fields do not pass muster. The exception message contains a
091     *         description of all validation errors found.
092     */
093    private static void validate(final String profileSection, final Map<String, ProfileFieldRule> requiredFields,
094                                final Map<String, String> fields, final Set<String> filter)
095        throws ProfileValidationException {
096
097        if (requiredFields != null) {
098            final StringBuilder errors = new StringBuilder();
099
100            for (String requiredField : requiredFields.keySet()) {
101                // ignore validation on system generated fields
102                if (filter.contains(requiredField)) {
103                    logger.debug("skipping system generated field {}...", requiredField);
104                    continue;
105                }
106
107                final ProfileFieldRule rule = requiredFields.get(requiredField);
108                if (fields.containsKey(requiredField)) {
109                    final String value = fields.get(requiredField);
110                    final Set<String> validValues = rule.getValues();
111                    if (validValues != null && !validValues.isEmpty()) {
112                        if (!validValues.contains(value)) {
113                            final String invalidMessage = "\"%s\" is not valid for \"%s\". Valid values: %s\n";
114                            errors.append(String.format(invalidMessage, value, requiredField, validValues));
115                        }
116                    }
117                } else if (rule.isRequired()) {
118                    errors.append("\"" + requiredField + "\" is a required field.\n");
119                } else if (rule.isRecommended()) {
120                    logger.warn("{} does not contain the recommended field {}", profileSection, requiredField);
121                }
122            }
123
124            if (errors.length() > 0) {
125                throw new ProfileValidationException(
126                        "Bag profile validation failure: The following errors occurred in the " +
127                                profileSection + ":\n" + errors.toString());
128            }
129        }
130
131    }
132
133    /**
134     * Validate that a {@code manifests} found in a {@link gov.loc.repository.bagit.domain.Bag} are allowed according to
135     * both the {@code required} and {@code allowed} sets from a {@link BagProfile}.
136     *
137     * @param manifests the manifests found in a {@link gov.loc.repository.bagit.domain.Bag}
138     * @param required the set of required manifest algorithms
139     * @param allowed the set of allowed manifest algorithms
140     * @param type the type of manifest being processed, normally 'tag' or 'payload'
141     * @return A String with any validation errors associated with the {@code manifests}
142     */
143    public static String validateManifest(final Set<Manifest> manifests, final Set<String> required,
144                                          final Set<String> allowed, final String type) {
145        final String missing = "Missing %s manifest algorithm: %s\n";
146        final String unsupported = "Unsupported %s manifest algorithm: %s\n";
147        final StringBuilder errors = new StringBuilder();
148
149        // make a copy so we do not mutate the BagProfile
150        final Set<String> requiredCopy = new HashSet<>(required);
151
152        for (final Manifest manifest : manifests) {
153            final String algorithm = manifest.getAlgorithm().getBagitName();
154            requiredCopy.remove(algorithm);
155
156            if (!allowed.isEmpty() && !allowed.contains(algorithm)) {
157                errors.append(String.format(unsupported, type, algorithm));
158            }
159        }
160
161        if (!requiredCopy.isEmpty()) {
162            errors.append(String.format(missing, type, required));
163        }
164
165        return errors.toString();
166    }
167
168    /**
169     * Check if a given tag file is part of the allowed tags. Should not be used against non-tag files such as the
170     * manifests or bagit.txt.
171     *
172     * @param tag the tag file to check
173     * @param allowedTags the list of allowed tag files, with unix style globbing allowed
174     * @throws ProfileValidationException when a tag file is not in the set of allowed tag filenames
175     */
176    public static void validateTagIsAllowed(final Path tag, final Set<String> allowedTags)
177        throws ProfileValidationException {
178        if (tag != null && allowedTags != null && !allowedTags.isEmpty()) {
179            // sanity check against required BagIt files
180            final String systemFiles = "bagit\\.txt|bag-info\\.txt|manifest-.*|tagmanifest-.*";
181            if (Pattern.matches(systemFiles, tag.toString())) {
182                logger.debug("Tag validator used against required file {}; ignoring", tag);
183                return;
184            }
185
186            boolean match = false;
187            for (String allowedTag : allowedTags) {
188                final PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + allowedTag);
189
190                if (matcher.matches(tag)) {
191                    match = true;
192                    break;
193                }
194            }
195
196            if (!match) {
197                throw new ProfileValidationException("Bag profile validation failure: tag " + tag +
198                                                     " is not allowed. List of allowed tag files are " +
199                                                     allowedTags);
200            }
201        }
202    }
203
204    /**
205     * Read an info file (bag-info.txt, aptrust-info.txt, etc)
206     *
207     * @param info the {@link Path} to the info file to read
208     * @return a mapping of keys to values read from the info file
209     * @throws IOException if a file cannot be read
210     */
211    private static Map<String, String> readInfo(final Path info) throws IOException {
212        logger.debug("Trying to read info file {}", info);
213        final Map<String, String> data = new HashMap<>();
214        final AtomicReference<String> previousKey = new AtomicReference<>("");
215
216        // if a line starts indented, it is part of the previous key so we track what key we're working on
217        try (Stream<String> lines = Files.lines(info)) {
218            lines.forEach(line -> {
219                if (line.matches("^\\s+")) {
220                    data.merge(previousKey.get(), line, String::concat);
221                } else {
222                    final String[] split = line.split(":");
223                    final String key = split[0].trim();
224                    final String value = split[1].trim();
225                    previousKey.set(key);
226                    data.put(key, value);
227                }
228            });
229        }
230
231        return data;
232    }
233
234}