001/*
002 * The contents of this file are subject to the license and copyright detailed
003 * in the LICENSE and NOTICE files at the root of the source tree.
004 */
005package org.duraspace.bagit;
006
007
008import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;
009import static org.assertj.core.api.Assertions.assertThat;
010import static org.junit.Assert.fail;
011
012import java.io.File;
013import java.io.IOException;
014import java.net.URISyntaxException;
015import java.net.URL;
016import java.nio.file.Files;
017import java.nio.file.Path;
018import java.nio.file.Paths;
019import java.security.MessageDigest;
020import java.time.LocalDate;
021import java.util.Collections;
022import java.util.HashMap;
023import java.util.List;
024import java.util.Map;
025import java.util.Objects;
026import java.util.stream.Stream;
027
028import gov.loc.repository.bagit.domain.Bag;
029import gov.loc.repository.bagit.exceptions.CorruptChecksumException;
030import gov.loc.repository.bagit.exceptions.FileNotInPayloadDirectoryException;
031import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException;
032import gov.loc.repository.bagit.exceptions.MaliciousPathException;
033import gov.loc.repository.bagit.exceptions.MissingBagitFileException;
034import gov.loc.repository.bagit.exceptions.MissingPayloadDirectoryException;
035import gov.loc.repository.bagit.exceptions.MissingPayloadManifestException;
036import gov.loc.repository.bagit.exceptions.UnparsableVersionException;
037import gov.loc.repository.bagit.exceptions.UnsupportedAlgorithmException;
038import gov.loc.repository.bagit.exceptions.VerificationException;
039import gov.loc.repository.bagit.reader.BagReader;
040import gov.loc.repository.bagit.verify.BagVerifier;
041import org.apache.commons.compress.utils.Sets;
042import org.apache.commons.io.FileUtils;
043import org.assertj.core.util.Maps;
044import org.junit.After;
045import org.junit.Before;
046import org.junit.Test;
047
048/**
049 * Test basic bag writing functionality to make sure we are writing compliant bags
050 *
051 * @author mikejritter
052 * @since 2020-03-05
053 */
054public class BagWriterTest {
055
056    // set up expected bag, data file, and tag files
057    private final String bagName = "bag-writer-test";
058    private final String filename = "hello-writer";
059    private final String extraTagName = "extra-tag.txt";
060
061    private Path bag;
062    private BagProfile profile;
063
064    @Before
065    public void setup() throws URISyntaxException, IOException {
066        final URL sampleUrl = this.getClass().getClassLoader().getResource("sample");
067        final Path sample = Paths.get(Objects.requireNonNull(sampleUrl).toURI());
068        bag = sample.resolve(bagName);
069
070        profile = new BagProfile(BagProfile.BuiltIn.BEYOND_THE_REPOSITORY);
071    }
072
073    @After
074    public void teardown() {
075        if (bag != null) {
076            FileUtils.deleteQuietly(bag.toFile());
077        }
078    }
079
080    @Test
081    public void write() throws IOException {
082        // The message digests to use
083        final BagItDigest sha1 = BagItDigest.SHA1;
084        final BagItDigest sha256 = BagItDigest.SHA256;
085        final BagItDigest sha512 = BagItDigest.SHA512;
086        final MessageDigest sha1MD = sha1.messageDigest();
087        final MessageDigest sha256MD = sha256.messageDigest();
088        final MessageDigest sha512MD = sha512.messageDigest();
089
090        // Create a writer with 3 manifest algorithms
091        Files.createDirectories(bag);
092        final BagWriter writer = new BagWriter(bag.toFile(), Sets.newHashSet(sha1, sha256, sha512));
093
094        // Setup the data file
095        final Path data = bag.resolve("data");
096        final Path file = Files.createFile(data.resolve(filename));
097        final Map<File, String> sha1Sums = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha1MD.digest()));
098        final Map<File, String> sha256Sums  = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha256MD.digest()));
099        final Map<File, String> sha512Sums = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha512MD.digest()));
100
101        writer.addTags(extraTagName, Maps.newHashMap("test-key", "test-value"));
102        writer.addTags(extraTagName, Maps.newHashMap("additional-key", "additional-value"));
103        final Map<String, String> bagInfoFields = new HashMap<>();
104        bagInfoFields.put(BagConfig.SOURCE_ORGANIZATION_KEY, "bagit-support");
105        bagInfoFields.put(BagConfig.BAGGING_DATE_KEY, ISO_LOCAL_DATE.format(LocalDate.now()));
106        bagInfoFields.put(BagConfig.BAG_SIZE_KEY, "0 bytes");
107        bagInfoFields.put(BagConfig.PAYLOAD_OXUM_KEY, "1.0");
108        writer.addTags(BagConfig.BAG_INFO_KEY, bagInfoFields);
109        writer.registerChecksums(sha1, sha1Sums);
110        writer.registerChecksums(sha256, sha256Sums);
111        writer.registerChecksums(sha512, sha512Sums);
112
113        writer.write();
114
115        final Path bagit = bag.resolve("bagit.txt");
116        final Path extra = bag.resolve(extraTagName);
117        final Path bagInfo = bag.resolve(BagConfig.BAG_INFO_KEY);
118        final Path sha1Manifest = bag.resolve("manifest-" + sha1.bagitName() + ".txt");
119        final Path sha1Tagmanifest = bag.resolve("tagmanifest-" + sha1.bagitName() + ".txt");
120        final Path sha256Manifest = bag.resolve("manifest-" + sha256.bagitName() + ".txt");
121        final Path sha256Tagmanifest = bag.resolve("tagmanifest-" + sha256.bagitName() + ".txt");
122        final Path sha512Manifest = bag.resolve("manifest-" + sha512.bagitName() + ".txt");
123        final Path sha512Tagmanifest = bag.resolve("tagmanifest-" + sha512.bagitName() + ".txt");
124
125        // Assert that all tag files (bagit.txt, bag-info.txt, etc) exist
126        assertThat(bagit).exists();
127        assertThat(extra).exists();
128        assertThat(bagInfo).exists();
129        assertThat(sha1Manifest).exists();
130        assertThat(sha1Tagmanifest).exists();
131        assertThat(sha256Manifest).exists();
132        assertThat(sha256Tagmanifest).exists();
133        assertThat(sha512Manifest).exists();
134        assertThat(sha512Tagmanifest).exists();
135
136        // Assert that bagit.txt contains expected lines
137        final List<String> bagitLines = Files.readAllLines(bagit);
138        assertThat(bagitLines).containsSequence("BagIt-Version: 1.0", "Tag-File-Character-Encoding: UTF-8");
139
140        // Assert that bag-info.txt contains... the bare necessities
141        final List<String> bagInfoLines = Files.readAllLines(bagInfo);
142        assertThat(bagInfoLines).contains(BagConfig.SOURCE_ORGANIZATION_KEY + ": bagit-support");
143
144        // Assert that extra-tag.txt exists
145        final List<String> extraLines = Files.readAllLines(extra);
146        assertThat(extraLines)
147            .hasSize(2)
148            .contains("test-key: test-value", "additional-key: additional-value");
149
150        // Assert that tagmanifest-{sha1,sha256,sha512}.txt contain the manifest checksums
151        final String manifestRegex = sha1.bagitName() + "|" + sha256.bagitName() + "|" + sha512.bagitName();
152        for (Path tagmanifest : Sets.newHashSet(sha1Tagmanifest, sha256Tagmanifest, sha512Tagmanifest)) {
153            try (Stream<String> lines = Files.lines(tagmanifest)) {
154                assertThat(lines)
155                    .filteredOn(line -> line.contains("manifest"))
156                    .hasSize(3)
157                    .allSatisfy(entry -> assertThat(entry).containsPattern(manifestRegex));
158            }
159        }
160
161        // Finally, pass BagProfile validation and BagIt validation
162        final BagReader reader = new BagReader();
163        final BagVerifier verifier = new BagVerifier();
164        try {
165            final Bag readBag = reader.read(bag);
166            profile.validateBag(readBag);
167            verifier.isValid(readBag, false);
168        } catch (UnparsableVersionException | MaliciousPathException | UnsupportedAlgorithmException |
169            InvalidBagitFileFormatException e) {
170            fail("Unable to read bag:\n" + e.getMessage());
171        } catch (VerificationException | MissingPayloadDirectoryException | MissingPayloadManifestException |
172            FileNotInPayloadDirectoryException | CorruptChecksumException | MissingBagitFileException |
173            InterruptedException e) {
174            fail("Unable to verify bag:\n" + e.getMessage());
175        }
176    }
177
178    @Test(expected = RuntimeException.class)
179    public void testAddInvalidAlgorithm() throws IOException {
180        // The message digests to use
181        final BagItDigest sha1 = BagItDigest.SHA1;
182        final BagItDigest sha256 = BagItDigest.SHA256;
183
184        // Create a writer with 3 manifest algorithms
185        Files.createDirectories(bag);
186        final BagWriter writer = new BagWriter(bag.toFile(), Sets.newHashSet(sha1));
187
188        // we don't need to pass any files, just the errant BagItDigest
189        writer.registerChecksums(sha256, Collections.emptyMap());
190    }
191
192}