001/*
002 * The contents of this file are subject to the license and copyright detailed
003 * in the LICENSE and NOTICE files at the root of the source tree.
004 */
005package org.duraspace.bagit;
006
007import java.io.File;
008import java.io.IOException;
009import java.io.OutputStream;
010import java.nio.file.Files;
011import java.nio.file.Path;
012import java.security.DigestOutputStream;
013import java.security.MessageDigest;
014import java.util.HashMap;
015import java.util.Map;
016import java.util.Set;
017import java.util.TreeMap;
018
019/**
020 * Utility to write BagIt bags.
021 *
022 * @author escowles
023 * @since 2016-12-15
024 */
025public class BagWriter {
026
027    private File bagDir;
028    private File dataDir;
029    private Set<BagItDigest> algorithms;
030
031    private Map<BagItDigest, Map<File, String>> payloadRegistry;
032    private Map<BagItDigest, Map<File, String>> tagFileRegistry;
033    private Map<String, Map<String, String>> tagRegistry;
034
035    /**
036     * This map provides a way to retrieve all ongoing MessageDigests so that multiple checksums
037     * can easily be run and retrieved
038     */
039    private Map<BagItDigest, DigestOutputStream> activeStreams;
040
041    /**
042     * Version of the BagIt specification implemented
043     */
044    public static String BAGIT_VERSION = "1.0";
045
046    /**
047     * Create a new, empty Bag
048     * @param bagDir The base directory for the Bag (will be created if it doesn't exist)
049     * @param algorithms Set of digest algorithms to use for manifests (e.g., "md5", "sha1", or "sha256")
050     */
051    public BagWriter(final File bagDir, final Set<BagItDigest> algorithms) {
052        this.bagDir = bagDir;
053        this.dataDir = new File(bagDir, "data");
054        if (!dataDir.exists()) {
055            dataDir.mkdirs();
056        }
057
058        this.algorithms = algorithms;
059        payloadRegistry = new HashMap<>();
060        tagFileRegistry = new HashMap<>();
061        tagRegistry = new HashMap<>();
062
063        final Map<String, String> bagitValues = new TreeMap<>();
064        bagitValues.put("BagIt-Version", BAGIT_VERSION);
065        bagitValues.put("Tag-File-Character-Encoding", "UTF-8");
066        tagRegistry.put("bagit.txt", bagitValues);
067
068        activeStreams = new HashMap<>();
069    }
070
071    /**
072     * Get the Bag's root directory
073     * @return File object for the directory
074     */
075    public File getRootDir() {
076        return bagDir;
077    }
078
079    /**
080     * Register checksums of payload (data) files
081     * @param algorithm Checksum digest algorithm name (e.g., "SHA-1")
082     * @param filemap Map of Files to checksum values
083     */
084    public void registerChecksums(final BagItDigest algorithm, final Map<File, String> filemap) {
085        if (!algorithms.contains(algorithm)) {
086            throw new RuntimeException("Invalid algorithm: " + algorithm);
087        }
088        payloadRegistry.put(algorithm, filemap);
089    }
090
091    /**
092     * Add tags (metadata) to the Bag. If the {@code key} already exists, the {@code values} will be appended to the
093     * existing entry.
094     *
095     * @param key Filename of the tag file (e.g., "bag-info.txt")
096     * @param values Map containing field/value pairs
097     */
098    public void addTags(final String key, final Map<String, String> values) {
099        final Map<String, String> tagValues = tagRegistry.computeIfAbsent(key, k -> new HashMap<>());
100        tagValues.putAll(values);
101    }
102
103    /**
104     * Get the current tag (metadata) of the Bag
105     * @param key Filename of the tag file (e.g., "bag-info.txt")
106     * @return Map of field/value pairs
107     */
108    public Map<String, String> getTags(final String key) {
109        return tagRegistry.get(key);
110    }
111
112    /**
113     * Write metadata and finalize Bag
114     * @throws IOException when an I/O error occurs
115     */
116    public void write() throws IOException {
117        writeManifests("manifest", payloadRegistry, true);
118        for (String tagFile : tagRegistry.keySet()) {
119            writeTagFile(tagFile);
120        }
121        writeManifests("tagmanifest", tagFileRegistry, false);
122    }
123
124    private void writeManifests(final String prefix, final Map<BagItDigest, Map<File, String>> registry,
125                                final boolean registerToTags) throws IOException {
126        final String delimiter = "  ";
127        final char backslash = '\\';
128        final char bagitSeparator = '/';
129        final Path bag = bagDir.toPath();
130
131        for (final BagItDigest algorithm : algorithms) {
132            final Map<File, String> filemap = registry.get(algorithm);
133            if (filemap != null) {
134                final File manifest = new File(bagDir, prefix + "-" + algorithm.bagitName() + ".txt");
135                try (OutputStream out = streamFor(manifest.toPath())) {
136                    for (final File payload : filemap.keySet()) {
137                        // replace all occurrences of backslashes, which are not allowed per the bagit spec
138                        final String relative = bag.relativize(payload.toPath()).toString()
139                                                   .replace(backslash, bagitSeparator);
140                        final String line = filemap.get(payload) + delimiter + relative;
141                        out.write(line.getBytes());
142                        out.write("\n".getBytes());
143                    }
144                }
145
146                // now that the stream is finished being written to, register the checksum if required
147                if (registerToTags) {
148                    for (Map.Entry<BagItDigest, DigestOutputStream> entry : activeStreams.entrySet()) {
149                        addTagChecksum(entry.getKey(), manifest, entry.getValue().getMessageDigest());
150                    }
151                }
152                activeStreams.clear();
153            }
154        }
155    }
156
157    private void writeTagFile(final String key) throws IOException {
158        final Map<String, String> values = tagRegistry.get(key);
159        if (values != null) {
160            final File f = new File(bagDir, key);
161
162            try (OutputStream out = streamFor(f.toPath())) {
163                for (final String field : values.keySet()) {
164                    final byte[] bytes = (field + ": " + values.get(field) + "\n").getBytes();
165                    out.write(bytes);
166                }
167            }
168
169            for (Map.Entry<BagItDigest, DigestOutputStream> entry : activeStreams.entrySet()) {
170                addTagChecksum(entry.getKey(), f, entry.getValue().getMessageDigest());
171            }
172        }
173
174        activeStreams.clear();
175    }
176
177    /**
178     * Create an {@link OutputStream} for a given {@link Path} which can be used to write data to the file.
179     * This wraps the returned {@link OutputStream} with {@link DigestOutputStream}s in order to create a checksum
180     * for the file as it is being written. There is one {@link DigestOutputStream} per {@link BagItDigest} in this
181     * classes registered {@code algorithms}. Each {@link DigestOutputStream} is stored in the {@code activeStreams} so
182     * that it can be retrieved later on.
183     *
184     * @param file the {@link Path} to create an {@link OutputStream} for
185     * @return the {@link OutputStream}
186     * @throws IOException if there is an error creating the {@link OutputStream}
187     */
188    private OutputStream streamFor(final Path file) throws IOException {
189        OutputStream lastStream = Files.newOutputStream(file);
190        for (BagItDigest algorithm : algorithms) {
191            final DigestOutputStream dos = new DigestOutputStream(lastStream, algorithm.messageDigest());
192            activeStreams.put(algorithm, dos);
193            lastStream = dos;
194        }
195
196        return lastStream;
197    }
198
199    private void addTagChecksum(final BagItDigest algorithm, final File f, final MessageDigest digest) {
200        if (digest != null) {
201            final Map<File, String> m = tagFileRegistry.computeIfAbsent(algorithm, key -> new HashMap<>());
202            m.put(f, HexEncoder.toString(digest.digest()));
203        }
204    }
205}