001/* 002 * The contents of this file are subject to the license and copyright detailed 003 * in the LICENSE and NOTICE files at the root of the source tree. 004 */ 005package org.duraspace.bagit; 006 007import static org.slf4j.LoggerFactory.getLogger; 008 009import java.io.IOException; 010import java.nio.file.FileSystems; 011import java.nio.file.Files; 012import java.nio.file.Path; 013import java.nio.file.PathMatcher; 014import java.util.Arrays; 015import java.util.Collections; 016import java.util.HashMap; 017import java.util.HashSet; 018import java.util.Map; 019import java.util.Set; 020import java.util.concurrent.atomic.AtomicReference; 021import java.util.regex.Pattern; 022import java.util.stream.Stream; 023 024import gov.loc.repository.bagit.domain.Manifest; 025import org.slf4j.Logger; 026 027/** 028 * Utility methods for validating profiles. 029 * 030 * @author mikejritter 031 * @author dbernstein 032 * @since Dec 14, 2016 033 */ 034public class ProfileValidationUtil { 035 036 private static final Logger logger = getLogger(ProfileValidationUtil.class); 037 038 /* 039 * System generated Bag Info that should be ignored by validator. 040 */ 041 protected static final Set<String> SYSTEM_GENERATED_FIELD_NAMES = 042 new HashSet<>(Arrays.asList("Bagging-Date", 043 "Bag-Size", 044 "Bag-Count", 045 "Payload-Oxum", 046 "BagIt-Profile-Identifier")); 047 048 private ProfileValidationUtil() { 049 } 050 051 /** 052 * Validates a {@code tag} file against a set of {@code requiredFields} and their constrained values. 053 * 054 * @param profileSection describes the section of the profile that is being validated. 055 * @param requiredFields the required fields and associated rule 056 * @param tag the path to the info file to read 057 * @throws IOException when the {@code tag} file cannot be read 058 * @throws ProfileValidationException when the fields do not pass muster. The exception message contains a 059 * description of all validation errors found. 060 */ 061 public static void validate(final String profileSection, final Map<String, ProfileFieldRule> requiredFields, 062 final Path tag) throws ProfileValidationException, IOException { 063 final Map<String, String> fields = readInfo(tag); 064 validate(profileSection, requiredFields, fields, Collections.emptySet()); 065 } 066 067 /** 068 * Validates the {@code fields} against a set of {@code requiredFields} and their constrained values. This is 069 * intended to be used in conjunction with {@link BagConfig} and will filter on certain fields defined by the 070 * {@code SYSTEM_GENERATED_FIELD_NAMES} constant. 071 * 072 * @param profileSection describes the section of the profile that is being validated. 073 * @param requiredFields the required fields and associated rule 074 * @param fields the key value pairs to be validated 075 * @throws ProfileValidationException when the fields do not pass muster. The exception message contains a 076 * description of all validation errors found. 077 */ 078 public static void validate(final String profileSection, final Map<String, ProfileFieldRule> requiredFields, 079 final Map<String, String> fields) throws ProfileValidationException { 080 validate(profileSection, requiredFields, fields, SYSTEM_GENERATED_FIELD_NAMES); 081 } 082 083 /** 084 * Validates the fields against the set of required fields and their constrained values. 085 * 086 * @param profileSection describes the section of the profile that is being validated. 087 * @param requiredFields the required fields and any allowable values (if constrained). 088 * @param fields The key value pairs to be validated. 089 * @param filter A set of fields to ignore when validating. Useful for export. 090 * @throws ProfileValidationException when the fields do not pass muster. The exception message contains a 091 * description of all validation errors found. 092 */ 093 private static void validate(final String profileSection, final Map<String, ProfileFieldRule> requiredFields, 094 final Map<String, String> fields, final Set<String> filter) 095 throws ProfileValidationException { 096 097 if (requiredFields != null) { 098 final StringBuilder errors = new StringBuilder(); 099 100 for (String requiredField : requiredFields.keySet()) { 101 // ignore validation on system generated fields 102 if (filter.contains(requiredField)) { 103 logger.debug("skipping system generated field {}...", requiredField); 104 continue; 105 } 106 107 final ProfileFieldRule rule = requiredFields.get(requiredField); 108 if (fields.containsKey(requiredField)) { 109 final String value = fields.get(requiredField); 110 final Set<String> validValues = rule.getValues(); 111 if (validValues != null && !validValues.isEmpty()) { 112 if (!validValues.contains(value)) { 113 final String invalidMessage = "\"%s\" is not valid for \"%s\". Valid values: %s\n"; 114 errors.append(String.format(invalidMessage, value, requiredField, validValues)); 115 } 116 } 117 } else if (rule.isRequired()) { 118 errors.append("\"" + requiredField + "\" is a required field.\n"); 119 } else if (rule.isRecommended()) { 120 logger.warn("{} does not contain the recommended field {}", profileSection, requiredField); 121 } 122 } 123 124 if (errors.length() > 0) { 125 throw new ProfileValidationException( 126 "Bag profile validation failure: The following errors occurred in the " + 127 profileSection + ":\n" + errors.toString()); 128 } 129 } 130 131 } 132 133 /** 134 * Validate that a {@code manifests} found in a {@link gov.loc.repository.bagit.domain.Bag} are allowed according to 135 * both the {@code required} and {@code allowed} sets from a {@link BagProfile}. 136 * 137 * @param manifests the manifests found in a {@link gov.loc.repository.bagit.domain.Bag} 138 * @param required the set of required manifest algorithms 139 * @param allowed the set of allowed manifest algorithms 140 * @param type the type of manifest being processed, normally 'tag' or 'payload' 141 * @return A String with any validation errors associated with the {@code manifests} 142 */ 143 public static String validateManifest(final Set<Manifest> manifests, final Set<String> required, 144 final Set<String> allowed, final String type) { 145 final String missing = "Missing %s manifest algorithm: %s\n"; 146 final String unsupported = "Unsupported %s manifest algorithm: %s\n"; 147 final StringBuilder errors = new StringBuilder(); 148 149 // make a copy so we do not mutate the BagProfile 150 final Set<String> requiredCopy = new HashSet<>(required); 151 152 for (final Manifest manifest : manifests) { 153 final String algorithm = manifest.getAlgorithm().getBagitName(); 154 requiredCopy.remove(algorithm); 155 156 if (!allowed.isEmpty() && !allowed.contains(algorithm)) { 157 errors.append(String.format(unsupported, type, algorithm)); 158 } 159 } 160 161 if (!requiredCopy.isEmpty()) { 162 errors.append(String.format(missing, type, required)); 163 } 164 165 return errors.toString(); 166 } 167 168 /** 169 * Check if a given tag file is part of the allowed tags. Should not be used against non-tag files such as the 170 * manifests or bagit.txt. 171 * 172 * @param tag the tag file to check 173 * @param allowedTags the list of allowed tag files, with unix style globbing allowed 174 * @throws ProfileValidationException when a tag file is not in the set of allowed tag filenames 175 */ 176 public static void validateTagIsAllowed(final Path tag, final Set<String> allowedTags) 177 throws ProfileValidationException { 178 if (tag != null && allowedTags != null && !allowedTags.isEmpty()) { 179 // sanity check against required BagIt files 180 final String systemFiles = "bagit\\.txt|bag-info\\.txt|manifest-.*|tagmanifest-.*"; 181 if (Pattern.matches(systemFiles, tag.toString())) { 182 logger.debug("Tag validator used against required file {}; ignoring", tag); 183 return; 184 } 185 186 boolean match = false; 187 for (String allowedTag : allowedTags) { 188 final PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + allowedTag); 189 190 if (matcher.matches(tag)) { 191 match = true; 192 break; 193 } 194 } 195 196 if (!match) { 197 throw new ProfileValidationException("Bag profile validation failure: tag " + tag + 198 " is not allowed. List of allowed tag files are " + 199 allowedTags); 200 } 201 } 202 } 203 204 /** 205 * Read an info file (bag-info.txt, aptrust-info.txt, etc) 206 * 207 * @param info the {@link Path} to the info file to read 208 * @return a mapping of keys to values read from the info file 209 * @throws IOException if a file cannot be read 210 */ 211 private static Map<String, String> readInfo(final Path info) throws IOException { 212 logger.debug("Trying to read info file {}", info); 213 final Map<String, String> data = new HashMap<>(); 214 final AtomicReference<String> previousKey = new AtomicReference<>(""); 215 216 // if a line starts indented, it is part of the previous key so we track what key we're working on 217 try (Stream<String> lines = Files.lines(info)) { 218 lines.forEach(line -> { 219 if (line.matches("^\\s+")) { 220 data.merge(previousKey.get(), line, String::concat); 221 } else { 222 final String[] split = line.split(":"); 223 final String key = split[0].trim(); 224 final String value = split[1].trim(); 225 previousKey.set(key); 226 data.put(key, value); 227 } 228 }); 229 } 230 231 return data; 232 } 233 234}