001/* 002 * The contents of this file are subject to the license and copyright detailed 003 * in the LICENSE and NOTICE files at the root of the source tree. 004 */ 005package org.duraspace.bagit; 006 007import java.io.File; 008import java.io.IOException; 009import java.io.OutputStream; 010import java.nio.file.Files; 011import java.nio.file.Path; 012import java.security.DigestOutputStream; 013import java.security.MessageDigest; 014import java.util.HashMap; 015import java.util.Map; 016import java.util.Set; 017import java.util.TreeMap; 018 019/** 020 * Utility to write BagIt bags. 021 * 022 * @author escowles 023 * @since 2016-12-15 024 */ 025public class BagWriter { 026 027 private File bagDir; 028 private File dataDir; 029 private Set<BagItDigest> algorithms; 030 031 private Map<BagItDigest, Map<File, String>> payloadRegistry; 032 private Map<BagItDigest, Map<File, String>> tagFileRegistry; 033 private Map<String, Map<String, String>> tagRegistry; 034 035 /** 036 * This map provides a way to retrieve all ongoing MessageDigests so that multiple checksums 037 * can easily be run and retrieved 038 */ 039 private Map<BagItDigest, DigestOutputStream> activeStreams; 040 041 /** 042 * Version of the BagIt specification implemented 043 */ 044 public static String BAGIT_VERSION = "1.0"; 045 046 /** 047 * Create a new, empty Bag 048 * @param bagDir The base directory for the Bag (will be created if it doesn't exist) 049 * @param algorithms Set of digest algorithms to use for manifests (e.g., "md5", "sha1", or "sha256") 050 */ 051 public BagWriter(final File bagDir, final Set<BagItDigest> algorithms) { 052 this.bagDir = bagDir; 053 this.dataDir = new File(bagDir, "data"); 054 if (!dataDir.exists()) { 055 dataDir.mkdirs(); 056 } 057 058 this.algorithms = algorithms; 059 payloadRegistry = new HashMap<>(); 060 tagFileRegistry = new HashMap<>(); 061 tagRegistry = new HashMap<>(); 062 063 final Map<String, String> bagitValues = new TreeMap<>(); 064 bagitValues.put("BagIt-Version", BAGIT_VERSION); 065 bagitValues.put("Tag-File-Character-Encoding", "UTF-8"); 066 tagRegistry.put("bagit.txt", bagitValues); 067 068 activeStreams = new HashMap<>(); 069 } 070 071 /** 072 * Get the Bag's root directory 073 * @return File object for the directory 074 */ 075 public File getRootDir() { 076 return bagDir; 077 } 078 079 /** 080 * Register checksums of payload (data) files 081 * @param algorithm Checksum digest algorithm name (e.g., "SHA-1") 082 * @param filemap Map of Files to checksum values 083 */ 084 public void registerChecksums(final BagItDigest algorithm, final Map<File, String> filemap) { 085 if (!algorithms.contains(algorithm)) { 086 throw new RuntimeException("Invalid algorithm: " + algorithm); 087 } 088 payloadRegistry.put(algorithm, filemap); 089 } 090 091 /** 092 * Add tags (metadata) to the Bag. If the {@code key} already exists, the {@code values} will be appended to the 093 * existing entry. 094 * 095 * @param key Filename of the tag file (e.g., "bag-info.txt") 096 * @param values Map containing field/value pairs 097 */ 098 public void addTags(final String key, final Map<String, String> values) { 099 final Map<String, String> tagValues = tagRegistry.computeIfAbsent(key, k -> new HashMap<>()); 100 tagValues.putAll(values); 101 } 102 103 /** 104 * Get the current tag (metadata) of the Bag 105 * @param key Filename of the tag file (e.g., "bag-info.txt") 106 * @return Map of field/value pairs 107 */ 108 public Map<String, String> getTags(final String key) { 109 return tagRegistry.get(key); 110 } 111 112 /** 113 * Write metadata and finalize Bag 114 * @throws IOException when an I/O error occurs 115 */ 116 public void write() throws IOException { 117 writeManifests("manifest", payloadRegistry, true); 118 for (String tagFile : tagRegistry.keySet()) { 119 writeTagFile(tagFile); 120 } 121 writeManifests("tagmanifest", tagFileRegistry, false); 122 } 123 124 private void writeManifests(final String prefix, final Map<BagItDigest, Map<File, String>> registry, 125 final boolean registerToTags) throws IOException { 126 final String delimiter = " "; 127 final char backslash = '\\'; 128 final char bagitSeparator = '/'; 129 final Path bag = bagDir.toPath(); 130 131 for (final BagItDigest algorithm : algorithms) { 132 final Map<File, String> filemap = registry.get(algorithm); 133 if (filemap != null) { 134 final File f = new File(bagDir, prefix + "-" + algorithm.bagitName() + ".txt"); 135 try (OutputStream out = streamFor(f.toPath())) { 136 for (final File payload : filemap.keySet()) { 137 // replace all occurrences of backslashes, which are not allowed per the bagit spec 138 final String relative = bag.relativize(payload.toPath()).toString() 139 .replace(backslash, bagitSeparator); 140 final String line = filemap.get(payload) + delimiter + relative; 141 out.write(line.getBytes()); 142 out.write("\n".getBytes()); 143 144 if (registerToTags) { 145 for (Map.Entry<BagItDigest, DigestOutputStream> entry : activeStreams.entrySet()) { 146 addTagChecksum(entry.getKey(), f, entry.getValue().getMessageDigest()); 147 } 148 } 149 activeStreams.clear(); 150 } 151 } 152 } 153 } 154 } 155 156 private void writeTagFile(final String key) throws IOException { 157 final Map<String, String> values = tagRegistry.get(key); 158 if (values != null) { 159 final File f = new File(bagDir, key); 160 161 try (OutputStream out = streamFor(f.toPath())) { 162 for (final String field : values.keySet()) { 163 final byte[] bytes = (field + ": " + values.get(field) + "\n").getBytes(); 164 out.write(bytes); 165 } 166 } 167 168 for (Map.Entry<BagItDigest, DigestOutputStream> entry : activeStreams.entrySet()) { 169 addTagChecksum(entry.getKey(), f, entry.getValue().getMessageDigest()); 170 } 171 } 172 173 activeStreams.clear(); 174 } 175 176 /** 177 * Create an {@link OutputStream} for a given {@link Path} which can be used to write data to the file. 178 * This wraps the returned {@link OutputStream} with {@link DigestOutputStream}s in order to create a checksum 179 * for the file as it is being written. There is one {@link DigestOutputStream} per {@link BagItDigest} in this 180 * classes registered {@code algorithms}. Each {@link DigestOutputStream} is stored in the {@code activeStreams} so 181 * that it can be retrieved later on. 182 * 183 * @param file the {@link Path} to create an {@link OutputStream} for 184 * @return the {@link OutputStream} 185 * @throws IOException if there is an error creating the {@link OutputStream} 186 */ 187 private OutputStream streamFor(final Path file) throws IOException { 188 OutputStream lastStream = Files.newOutputStream(file); 189 for (BagItDigest algorithm : algorithms) { 190 final DigestOutputStream dos = new DigestOutputStream(lastStream, algorithm.messageDigest()); 191 activeStreams.put(algorithm, dos); 192 lastStream = dos; 193 } 194 195 return lastStream; 196 } 197 198 private void addTagChecksum(final BagItDigest algorithm, final File f, final MessageDigest digest) { 199 if (digest != null) { 200 final Map<File, String> m = tagFileRegistry.computeIfAbsent(algorithm, key -> new HashMap<>()); 201 m.put(f, HexEncoder.toString(digest.digest())); 202 } 203 } 204}