001/*
002 * The contents of this file are subject to the license and copyright detailed
003 * in the LICENSE and NOTICE files at the root of the source tree.
004 */
005package org.duraspace.bagit;
006
007
008import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;
009import static org.assertj.core.api.Assertions.assertThat;
010import static org.junit.Assert.fail;
011
012import java.io.File;
013import java.io.IOException;
014import java.net.URISyntaxException;
015import java.net.URL;
016import java.nio.file.Files;
017import java.nio.file.Path;
018import java.nio.file.Paths;
019import java.security.MessageDigest;
020import java.time.LocalDate;
021import java.util.Collections;
022import java.util.HashMap;
023import java.util.List;
024import java.util.Map;
025import java.util.Objects;
026import java.util.stream.Stream;
027
028import gov.loc.repository.bagit.domain.Bag;
029import gov.loc.repository.bagit.exceptions.CorruptChecksumException;
030import gov.loc.repository.bagit.exceptions.FileNotInPayloadDirectoryException;
031import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException;
032import gov.loc.repository.bagit.exceptions.MaliciousPathException;
033import gov.loc.repository.bagit.exceptions.MissingBagitFileException;
034import gov.loc.repository.bagit.exceptions.MissingPayloadDirectoryException;
035import gov.loc.repository.bagit.exceptions.MissingPayloadManifestException;
036import gov.loc.repository.bagit.exceptions.UnparsableVersionException;
037import gov.loc.repository.bagit.exceptions.UnsupportedAlgorithmException;
038import gov.loc.repository.bagit.exceptions.VerificationException;
039import gov.loc.repository.bagit.reader.BagReader;
040import gov.loc.repository.bagit.verify.BagVerifier;
041import org.apache.commons.compress.utils.Sets;
042import org.apache.commons.io.FileUtils;
043import org.assertj.core.util.Maps;
044import org.junit.After;
045import org.junit.Before;
046import org.junit.Test;
047
048/**
049 * Test basic bag writing functionality to make sure we are writing compliant bags
050 *
051 * @author mikejritter
052 * @since 2020-03-05
053 */
054public class BagWriterTest {
055
056    // set up expected bag, data file, and tag files
057    private final String bagName = "bag-writer-test";
058    private final String filename = "hello-writer";
059    private final String extraTagName = "extra-tag.txt";
060
061    private Path bag;
062    private BagProfile profile;
063
064    @Before
065    public void setup() throws URISyntaxException, IOException {
066        final URL sampleUrl = this.getClass().getClassLoader().getResource("sample");
067        final Path sample = Paths.get(Objects.requireNonNull(sampleUrl).toURI());
068        bag = sample.resolve(bagName);
069
070        profile = new BagProfile(BagProfile.BuiltIn.BEYOND_THE_REPOSITORY);
071    }
072
073    @After
074    public void teardown() {
075        if (bag != null) {
076            FileUtils.deleteQuietly(bag.toFile());
077        }
078    }
079
080    @Test
081    public void write() throws IOException {
082        // The message digests to use
083        final BagItDigest sha1 = BagItDigest.SHA1;
084        final BagItDigest sha256 = BagItDigest.SHA256;
085        final BagItDigest sha512 = BagItDigest.SHA512;
086        final MessageDigest sha1MD = sha1.messageDigest();
087        final MessageDigest sha256MD = sha256.messageDigest();
088        final MessageDigest sha512MD = sha512.messageDigest();
089
090        // Create a writer with 3 manifest algorithms
091        Files.createDirectories(bag);
092        final BagWriter writer = new BagWriter(bag.toFile(), Sets.newHashSet(sha1, sha256, sha512));
093
094        // Setup the data files
095        final Path data = bag.resolve("data");
096        final Path file = Files.createFile(data.resolve(filename));
097        final Map<File, String> sha1Sums = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha1MD.digest()));
098        final Map<File, String> sha256Sums  = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha256MD.digest()));
099        final Map<File, String> sha512Sums = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha512MD.digest()));
100
101        // second file
102        final Path file2 = Files.createFile(data.resolve(filename + "2"));
103        sha1Sums.put(file2.toFile(), HexEncoder.toString(sha1MD.digest()));
104        sha256Sums.put(file2.toFile(), HexEncoder.toString(sha256MD.digest()));
105        sha512Sums.put(file2.toFile(), HexEncoder.toString(sha512MD.digest()));
106
107        writer.addTags(extraTagName, Maps.newHashMap("test-key", "test-value"));
108        writer.addTags(extraTagName, Maps.newHashMap("additional-key", "additional-value"));
109        final Map<String, String> bagInfoFields = new HashMap<>();
110        bagInfoFields.put(BagConfig.SOURCE_ORGANIZATION_KEY, "bagit-support");
111        bagInfoFields.put(BagConfig.BAGGING_DATE_KEY, ISO_LOCAL_DATE.format(LocalDate.now()));
112        bagInfoFields.put(BagConfig.BAG_SIZE_KEY, "0 bytes");
113        bagInfoFields.put(BagConfig.PAYLOAD_OXUM_KEY, "1.0");
114        writer.addTags(BagConfig.BAG_INFO_KEY, bagInfoFields);
115        writer.registerChecksums(sha1, sha1Sums);
116        writer.registerChecksums(sha256, sha256Sums);
117        writer.registerChecksums(sha512, sha512Sums);
118
119        writer.write();
120
121        final Path bagit = bag.resolve("bagit.txt");
122        final Path extra = bag.resolve(extraTagName);
123        final Path bagInfo = bag.resolve(BagConfig.BAG_INFO_KEY);
124        final Path sha1Manifest = bag.resolve("manifest-" + sha1.bagitName() + ".txt");
125        final Path sha1Tagmanifest = bag.resolve("tagmanifest-" + sha1.bagitName() + ".txt");
126        final Path sha256Manifest = bag.resolve("manifest-" + sha256.bagitName() + ".txt");
127        final Path sha256Tagmanifest = bag.resolve("tagmanifest-" + sha256.bagitName() + ".txt");
128        final Path sha512Manifest = bag.resolve("manifest-" + sha512.bagitName() + ".txt");
129        final Path sha512Tagmanifest = bag.resolve("tagmanifest-" + sha512.bagitName() + ".txt");
130
131        // Assert that all tag files (bagit.txt, bag-info.txt, etc) exist
132        assertThat(bagit).exists();
133        assertThat(extra).exists();
134        assertThat(bagInfo).exists();
135        assertThat(sha1Manifest).exists();
136        assertThat(sha1Tagmanifest).exists();
137        assertThat(sha256Manifest).exists();
138        assertThat(sha256Tagmanifest).exists();
139        assertThat(sha512Manifest).exists();
140        assertThat(sha512Tagmanifest).exists();
141
142        // Assert that bagit.txt contains expected lines
143        final List<String> bagitLines = Files.readAllLines(bagit);
144        assertThat(bagitLines).containsSequence("BagIt-Version: 1.0", "Tag-File-Character-Encoding: UTF-8");
145
146        // Assert that bag-info.txt contains... the bare necessities
147        final List<String> bagInfoLines = Files.readAllLines(bagInfo);
148        assertThat(bagInfoLines).contains(BagConfig.SOURCE_ORGANIZATION_KEY + ": bagit-support");
149
150        // Assert that extra-tag.txt exists
151        final List<String> extraLines = Files.readAllLines(extra);
152        assertThat(extraLines)
153            .hasSize(2)
154            .contains("test-key: test-value", "additional-key: additional-value");
155
156        // Assert that tagmanifest-{sha1,sha256,sha512}.txt contain the manifest checksums
157        final String manifestRegex = sha1.bagitName() + "|" + sha256.bagitName() + "|" + sha512.bagitName();
158        for (Path tagmanifest : Sets.newHashSet(sha1Tagmanifest, sha256Tagmanifest, sha512Tagmanifest)) {
159            try (Stream<String> lines = Files.lines(tagmanifest)) {
160                assertThat(lines)
161                    .filteredOn(line -> line.contains("manifest"))
162                    .hasSize(3)
163                    .allSatisfy(entry -> assertThat(entry).containsPattern(manifestRegex));
164            }
165        }
166
167        // Finally, pass BagProfile validation and BagIt validation
168        final BagReader reader = new BagReader();
169        final BagVerifier verifier = new BagVerifier();
170        try {
171            final Bag readBag = reader.read(bag);
172            profile.validateBag(readBag);
173            verifier.isValid(readBag, false);
174        } catch (UnparsableVersionException | MaliciousPathException | UnsupportedAlgorithmException |
175            InvalidBagitFileFormatException e) {
176            fail("Unable to read bag:\n" + e.getMessage());
177        } catch (VerificationException | MissingPayloadDirectoryException | MissingPayloadManifestException |
178            FileNotInPayloadDirectoryException | CorruptChecksumException | MissingBagitFileException |
179            InterruptedException e) {
180            fail("Unable to verify bag:\n" + e.getMessage());
181        }
182    }
183
184    @Test(expected = RuntimeException.class)
185    public void testAddInvalidAlgorithm() throws IOException {
186        // The message digests to use
187        final BagItDigest sha1 = BagItDigest.SHA1;
188        final BagItDigest sha256 = BagItDigest.SHA256;
189
190        // Create a writer with 3 manifest algorithms
191        Files.createDirectories(bag);
192        final BagWriter writer = new BagWriter(bag.toFile(), Sets.newHashSet(sha1));
193
194        // we don't need to pass any files, just the errant BagItDigest
195        writer.registerChecksums(sha256, Collections.emptyMap());
196    }
197
198}