001/* 002 * The contents of this file are subject to the license and copyright detailed 003 * in the LICENSE and NOTICE files at the root of the source tree. 004 */ 005package org.duraspace.bagit; 006 007import java.io.IOException; 008import java.io.InputStream; 009import java.io.OutputStream; 010import java.nio.file.Files; 011import java.nio.file.Path; 012import java.util.List; 013import java.util.stream.Collectors; 014import java.util.zip.GZIPOutputStream; 015 016import org.apache.commons.compress.archivers.ArchiveEntry; 017import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; 018import org.apache.commons.compress.utils.IOUtils; 019 020/** 021 * Serialize a BagIt bag to be a tar+gzip archive. 022 * 023 * @author mikejritter 024 * @since 2020-02-24 025 */ 026public class TarGzBagSerializer implements BagSerializer { 027 private final String extension = ".tar.gz"; 028 029 @Override 030 public Path serialize(final Path root) throws IOException { 031 final Path parent = root.getParent().toAbsolutePath(); 032 final String bagName = root.getFileName().toString(); 033 034 final Path serializedBag = parent.resolve(bagName + extension); 035 try(final OutputStream os = Files.newOutputStream(serializedBag); 036 final GZIPOutputStream gzip = new GZIPOutputStream(os); 037 final TarArchiveOutputStream tar = new TarArchiveOutputStream(gzip)) { 038 tar.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX); 039 tar.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX); 040 final List<Path> files = Files.walk(root).collect(Collectors.toList()); 041 for (Path bagEntry : files) { 042 final String name = parent.relativize(bagEntry).toString(); 043 final ArchiveEntry entry = tar.createArchiveEntry(bagEntry.toFile(), name); 044 tar.putArchiveEntry(entry); 045 if (bagEntry.toFile().isFile()) { 046 try (InputStream inputStream = Files.newInputStream(bagEntry)) { 047 IOUtils.copy(inputStream, tar); 048 } 049 } 050 tar.closeArchiveEntry(); 051 } 052 } 053 054 return serializedBag; 055 } 056}