001/*
002 * The contents of this file are subject to the license and copyright detailed
003 * in the LICENSE and NOTICE files at the root of the source tree.
004 */
005package org.duraspace.bagit;
006
007import static org.duraspace.bagit.BagItDigest.MD5;
008import static org.duraspace.bagit.BagItDigest.SHA1;
009import static org.duraspace.bagit.BagItDigest.SHA256;
010import static org.duraspace.bagit.BagItDigest.SHA512;
011
012import java.io.BufferedWriter;
013import java.io.File;
014import java.io.FileOutputStream;
015import java.io.FileWriter;
016import java.io.IOException;
017import java.io.OutputStream;
018import java.io.PrintWriter;
019import java.nio.file.Path;
020import java.security.MessageDigest;
021import java.util.HashMap;
022import java.util.Map;
023import java.util.Set;
024
025/**
026 * Utility to write BagIt bags.
027 *
028 * @author escowles
029 * @since 2016-12-15
030 */
031public class BagWriter {
032
033    private File bagDir;
034    private File dataDir;
035    private Set<String> algorithms;
036
037    private Map<String, Map<File, String>> payloadRegistry;
038    private Map<String, Map<File, String>> tagFileRegistry;
039    private Map<String, Map<String, String>> tagRegistry;
040
041    /**
042     * Version of the BagIt specification implemented
043     */
044    public static String BAGIT_VERSION = "0.97";
045
046    /**
047     * Create a new, empty Bag
048     * @param bagDir The base directory for the Bag (will be created if it doesn't exist)
049     * @param algorithms Set of digest algorithms to use for manifests (e.g., "md5", "sha1", or "sha256")
050     */
051    public BagWriter(final File bagDir, final Set<String> algorithms) {
052        this.bagDir = bagDir;
053        this.dataDir = new File(bagDir, "data");
054        if (!dataDir.exists()) {
055            dataDir.mkdirs();
056        }
057
058        this.algorithms = algorithms;
059        payloadRegistry = new HashMap<>();
060        tagFileRegistry = new HashMap<>();
061        tagRegistry = new HashMap<>();
062
063        final Map<String, String> bagitValues = new HashMap<>();
064        bagitValues.put("BagIt-Version", BAGIT_VERSION);
065        bagitValues.put("Tag-File-Character-Encoding", "UTF-8");
066        tagRegistry.put("bagit.txt", bagitValues);
067    }
068
069    /**
070     * Get the Bag's root directory
071     * @return File object for the directory
072     */
073    public File getRootDir() {
074        return bagDir;
075    }
076
077    /**
078     * Register checksums of payload (data) files
079     * @param algorithm Checksum digest algorithm name (e.g., "SHA-1")
080     * @param filemap Map of Files to checksum values
081     */
082    public void registerChecksums(final String algorithm, final Map<File, String> filemap) {
083        if (!algorithms.contains(algorithm)) {
084            throw new RuntimeException("Invalid algorithm: " + algorithm);
085        }
086        payloadRegistry.put(algorithm, filemap);
087    }
088
089    /**
090     * Add tags (metadata) to the Bag
091     * @param key Filename of the tag file (e.g., "bag-info.txt")
092     * @param values Map containing field/value pairs
093     */
094    public void addTags(final String key, final Map<String, String> values) {
095        tagRegistry.put(key, values);
096    }
097
098    /**
099     * Get the current tag (metadata) of the Bag
100     * @param key Filename of the tag file (e.g., "bag-info.txt")
101     * @return Map of field/value pairs
102     */
103    public Map<String, String> getTags(final String key) {
104        return tagRegistry.get(key);
105    }
106
107    /**
108     * Write metadata and finalize Bag
109     * @throws IOException when an I/O error occurs
110     */
111    public void write() throws IOException {
112        writeManifests("manifest", payloadRegistry);
113        for (String tagFile : tagRegistry.keySet()) {
114            writeTagFile(tagFile);
115        }
116        writeManifests("tagmanifest", tagFileRegistry);
117    }
118
119    private void writeManifests(final String prefix, final Map<String, Map<File, String>> registry)
120            throws IOException {
121        final String delimiter = "  ";
122        final char backslash = '\\';
123        final char bagitSeparator = '/';
124        final Path bag = bagDir.toPath();
125
126        for (final String algorithm : algorithms) {
127            final Map<File, String> filemap = registry.get(algorithm);
128            if (filemap != null) {
129                final File f = new File(bagDir, prefix + "-" + algorithm + ".txt");
130                try (PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(f)))) {
131                    for (final File payload : filemap.keySet()) {
132                        // replace all occurrences of backslashes, which are not allowed per the bagit spec
133                        final String relative = bag.relativize(payload.toPath()).toString()
134                                                   .replace(backslash, bagitSeparator);
135                        out.println(filemap.get(payload) + delimiter + relative);
136                    }
137                }
138            }
139        }
140    }
141
142    private void writeTagFile(final String key) throws IOException {
143        final Map<String, String> values = tagRegistry.get(key);
144        if (values != null) {
145            final File f = new File(bagDir, key);
146
147            MessageDigest md5 = null;
148            MessageDigest sha1 = null;
149            MessageDigest sha256 = null;
150            MessageDigest sha512 = null;
151            if (algorithms.contains(MD5.bagitName())) {
152                md5 = MD5.messageDigest();
153            }
154            if (algorithms.contains(SHA1.bagitName())) {
155                sha1 = SHA1.messageDigest();
156            }
157            if (algorithms.contains(SHA256.bagitName())) {
158                sha256 = SHA256.messageDigest();
159            }
160            if (algorithms.contains(SHA512.bagitName())) {
161                sha512 = SHA512.messageDigest();
162            }
163
164            try (OutputStream out = new FileOutputStream(f)) {
165                for (final String field : values.keySet()) {
166                    final byte[] bytes = (field + ": " + values.get(field) + "\n").getBytes();
167                    out.write(bytes);
168
169                    if (md5 != null) {
170                        md5.update(bytes);
171                    }
172                    if (sha1 != null) {
173                        sha1.update(bytes);
174                    }
175                    if (sha256 != null) {
176                        sha256.update(bytes);
177                    }
178                    if (sha512 != null) {
179                        sha512.update(bytes);
180                    }
181                }
182            }
183
184            addTagChecksum(MD5.bagitName(), f, md5);
185            addTagChecksum(SHA1.bagitName(), f, sha1);
186            addTagChecksum(SHA256.bagitName(), f, sha256);
187            addTagChecksum(SHA512.bagitName(), f, sha512);
188        }
189    }
190
191    private void addTagChecksum(final String algorithm, final File f, final MessageDigest digest) {
192        if (digest != null) {
193            final Map<File, String> m = tagFileRegistry.computeIfAbsent(algorithm, key -> new HashMap<>());
194            m.put(f, HexEncoder.toString(digest.digest()));
195        }
196    }
197}