001/* 002 * The contents of this file are subject to the license and copyright detailed 003 * in the LICENSE and NOTICE files at the root of the source tree. 004 */ 005package org.duraspace.bagit; 006 007import java.io.IOException; 008import java.io.InputStream; 009import java.io.OutputStream; 010import java.nio.file.Files; 011import java.nio.file.Path; 012import java.util.List; 013import java.util.stream.Collectors; 014import java.util.zip.GZIPOutputStream; 015 016import org.apache.commons.compress.archivers.ArchiveEntry; 017import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 018import org.apache.commons.compress.utils.IOUtils; 019 020/** 021 * Serialize a BagIt bag to be a tar+gzip archive. 022 * 023 * @author mikejritter 024 * @since 2020-02-24 025 */ 026public class TarGzBagSerializer implements BagSerializer { 027 private final String extension = ".tar.gz"; 028 029 @Override 030 public Path serialize(final Path root) throws IOException { 031 final Path parent = root.getParent().toAbsolutePath(); 032 final String bagName = root.getFileName().toString(); 033 034 final Path serializedBag = parent.resolve(bagName + extension); 035 try(final OutputStream os = Files.newOutputStream(serializedBag); 036 final GZIPOutputStream gzip = new GZIPOutputStream(os); 037 final TarArchiveOutputStream tar = new TarArchiveOutputStream(gzip)) { 038 final List<Path> files = Files.walk(root).collect(Collectors.toList()); 039 for (Path bagEntry : files) { 040 final String name = parent.relativize(bagEntry).toString(); 041 final ArchiveEntry entry = tar.createArchiveEntry(bagEntry.toFile(), name); 042 tar.putArchiveEntry(entry); 043 if (bagEntry.toFile().isFile()) { 044 try (InputStream inputStream = Files.newInputStream(bagEntry)) { 045 IOUtils.copy(inputStream, tar); 046 } 047 } 048 tar.closeArchiveEntry(); 049 } 050 } 051 052 return serializedBag; 053 } 054}