001/* 002 * The contents of this file are subject to the license and copyright detailed 003 * in the LICENSE and NOTICE files at the root of the source tree. 004 */ 005package org.duraspace.bagit; 006 007import java.io.IOException; 008import java.nio.file.Files; 009import java.nio.file.Path; 010import java.util.Optional; 011 012import org.apache.commons.compress.archivers.ArchiveEntry; 013import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; 014import org.slf4j.Logger; 015import org.slf4j.LoggerFactory; 016 017/** 018 * Deserializer for {@link gov.loc.repository.bagit.domain.Bag}s serialized using tar 019 * 020 * @author mikejritter 021 * @since 2020-02-11 022 */ 023public class TarBagDeserializer implements BagDeserializer { 024 025 private final Logger logger = LoggerFactory.getLogger(TarBagDeserializer.class); 026 027 protected TarBagDeserializer() { 028 } 029 030 @Override 031 public Path deserialize(final Path root) throws IOException { 032 logger.info("Extracting serialized bag: {}", root.getFileName()); 033 034 final Path parent = root.getParent(); 035 final int rootPathCount = root.getNameCount(); 036 Optional<String> filename = Optional.empty(); 037 try (TarArchiveInputStream tais = new TarArchiveInputStream(Files.newInputStream(root))) { 038 ArchiveEntry entry; 039 while ((entry = tais.getNextEntry()) != null) { 040 final String name = entry.getName(); 041 042 logger.debug("Handling entry {}", entry.getName()); 043 final Path archiveFile = parent.resolve(name); 044 045 if (Files.notExists(archiveFile.getParent())) { 046 Files.createDirectories(archiveFile.getParent()); 047 } 048 049 if (entry.isDirectory()) { 050 Files.createDirectories(archiveFile); 051 if (archiveFile.getNameCount() == rootPathCount) { 052 logger.debug("Archive name is {}", archiveFile.getFileName()); 053 filename = Optional.of(archiveFile.getFileName().toString()); 054 } 055 } else { 056 if (Files.exists(parent.resolve(name))) { 057 logger.warn("File {} already exists!", name); 058 } else { 059 Files.copy(tais, archiveFile); 060 } 061 } 062 } 063 } 064 065 final String extracted = filename.orElseGet(() -> { 066 // get the name from the tarball minus the extension 067 final String rootName = root.getFileName().toString(); 068 final int dotIdx = rootName.lastIndexOf("."); 069 return rootName.substring(0, dotIdx); 070 }); 071 return parent.resolve(extracted); 072 } 073}