001/* 002 * The contents of this file are subject to the license and copyright detailed 003 * in the LICENSE and NOTICE files at the root of the source tree. 004 */ 005package org.duraspace.bagit; 006 007import static org.duraspace.bagit.BagItDigest.MD5; 008import static org.duraspace.bagit.BagItDigest.SHA1; 009import static org.duraspace.bagit.BagItDigest.SHA256; 010import static org.duraspace.bagit.BagItDigest.SHA512; 011 012import java.io.BufferedWriter; 013import java.io.File; 014import java.io.FileOutputStream; 015import java.io.FileWriter; 016import java.io.IOException; 017import java.io.OutputStream; 018import java.io.PrintWriter; 019import java.nio.file.Path; 020import java.security.MessageDigest; 021import java.util.HashMap; 022import java.util.Map; 023import java.util.Set; 024 025/** 026 * Utility to write BagIt bags. 027 * 028 * @author escowles 029 * @since 2016-12-15 030 */ 031public class BagWriter { 032 033 private File bagDir; 034 private File dataDir; 035 private Set<String> algorithms; 036 037 private Map<String, Map<File, String>> payloadRegistry; 038 private Map<String, Map<File, String>> tagFileRegistry; 039 private Map<String, Map<String, String>> tagRegistry; 040 041 /** 042 * Version of the BagIt specification implemented 043 */ 044 public static String BAGIT_VERSION = "0.97"; 045 046 /** 047 * Create a new, empty Bag 048 * @param bagDir The base directory for the Bag (will be created if it doesn't exist) 049 * @param algorithms Set of digest algorithms to use for manifests (e.g., "md5", "sha1", or "sha256") 050 */ 051 public BagWriter(final File bagDir, final Set<String> algorithms) { 052 this.bagDir = bagDir; 053 this.dataDir = new File(bagDir, "data"); 054 if (!dataDir.exists()) { 055 dataDir.mkdirs(); 056 } 057 058 this.algorithms = algorithms; 059 payloadRegistry = new HashMap<>(); 060 tagFileRegistry = new HashMap<>(); 061 tagRegistry = new HashMap<>(); 062 063 final Map<String, String> bagitValues = new HashMap<>(); 064 bagitValues.put("BagIt-Version", BAGIT_VERSION); 065 bagitValues.put("Tag-File-Character-Encoding", "UTF-8"); 066 tagRegistry.put("bagit.txt", bagitValues); 067 } 068 069 /** 070 * Get the Bag's root directory 071 * @return File object for the directory 072 */ 073 public File getRootDir() { 074 return bagDir; 075 } 076 077 /** 078 * Register checksums of payload (data) files 079 * @param algorithm Checksum digest algorithm name (e.g., "SHA-1") 080 * @param filemap Map of Files to checksum values 081 */ 082 public void registerChecksums(final String algorithm, final Map<File, String> filemap) { 083 if (!algorithms.contains(algorithm)) { 084 throw new RuntimeException("Invalid algorithm: " + algorithm); 085 } 086 payloadRegistry.put(algorithm, filemap); 087 } 088 089 /** 090 * Add tags (metadata) to the Bag 091 * @param key Filename of the tag file (e.g., "bag-info.txt") 092 * @param values Map containing field/value pairs 093 */ 094 public void addTags(final String key, final Map<String, String> values) { 095 tagRegistry.put(key, values); 096 } 097 098 /** 099 * Get the current tag (metadata) of the Bag 100 * @param key Filename of the tag file (e.g., "bag-info.txt") 101 * @return Map of field/value pairs 102 */ 103 public Map<String, String> getTags(final String key) { 104 return tagRegistry.get(key); 105 } 106 107 /** 108 * Write metadata and finalize Bag 109 * @throws IOException when an I/O error occurs 110 */ 111 public void write() throws IOException { 112 writeManifests("manifest", payloadRegistry); 113 for (String tagFile : tagRegistry.keySet()) { 114 writeTagFile(tagFile); 115 } 116 writeManifests("tagmanifest", tagFileRegistry); 117 } 118 119 private void writeManifests(final String prefix, final Map<String, Map<File, String>> registry) 120 throws IOException { 121 final String delimiter = " "; 122 final char backslash = '\\'; 123 final char bagitSeparator = '/'; 124 final Path bag = bagDir.toPath(); 125 126 for (final String algorithm : algorithms) { 127 final Map<File, String> filemap = registry.get(algorithm); 128 if (filemap != null) { 129 final File f = new File(bagDir, prefix + "-" + algorithm + ".txt"); 130 try (PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(f)))) { 131 for (final File payload : filemap.keySet()) { 132 // replace all occurrences of backslashes, which are not allowed per the bagit spec 133 final String relative = bag.relativize(payload.toPath()).toString() 134 .replace(backslash, bagitSeparator); 135 out.println(filemap.get(payload) + delimiter + relative); 136 } 137 } 138 } 139 } 140 } 141 142 private void writeTagFile(final String key) throws IOException { 143 final Map<String, String> values = tagRegistry.get(key); 144 if (values != null) { 145 final File f = new File(bagDir, key); 146 147 MessageDigest md5 = null; 148 MessageDigest sha1 = null; 149 MessageDigest sha256 = null; 150 MessageDigest sha512 = null; 151 if (algorithms.contains(MD5.bagitName())) { 152 md5 = MD5.messageDigest(); 153 } 154 if (algorithms.contains(SHA1.bagitName())) { 155 sha1 = SHA1.messageDigest(); 156 } 157 if (algorithms.contains(SHA256.bagitName())) { 158 sha256 = SHA256.messageDigest(); 159 } 160 if (algorithms.contains(SHA512.bagitName())) { 161 sha512 = SHA512.messageDigest(); 162 } 163 164 try (OutputStream out = new FileOutputStream(f)) { 165 for (final String field : values.keySet()) { 166 final byte[] bytes = (field + ": " + values.get(field) + "\n").getBytes(); 167 out.write(bytes); 168 169 if (md5 != null) { 170 md5.update(bytes); 171 } 172 if (sha1 != null) { 173 sha1.update(bytes); 174 } 175 if (sha256 != null) { 176 sha256.update(bytes); 177 } 178 if (sha512 != null) { 179 sha512.update(bytes); 180 } 181 } 182 } 183 184 addTagChecksum(MD5.bagitName(), f, md5); 185 addTagChecksum(SHA1.bagitName(), f, sha1); 186 addTagChecksum(SHA256.bagitName(), f, sha256); 187 addTagChecksum(SHA512.bagitName(), f, sha512); 188 } 189 } 190 191 private void addTagChecksum(final String algorithm, final File f, final MessageDigest digest) { 192 if (digest != null) { 193 final Map<File, String> m = tagFileRegistry.computeIfAbsent(algorithm, key -> new HashMap<>()); 194 m.put(f, HexEncoder.toString(digest.digest())); 195 } 196 } 197}