001/*
002 * The contents of this file are subject to the license and copyright detailed
003 * in the LICENSE and NOTICE files at the root of the source tree.
004 */
005package org.duraspace.bagit;
006
007import java.io.IOException;
008import java.nio.file.Files;
009import java.nio.file.Path;
010import java.util.Optional;
011
012import org.apache.commons.compress.archivers.ArchiveEntry;
013import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
014import org.slf4j.Logger;
015import org.slf4j.LoggerFactory;
016
017/**
018 * Deserializer for {@link gov.loc.repository.bagit.domain.Bag}s serialized using tar
019 *
020 * @author mikejritter
021 * @since 2020-02-11
022 */
023public class TarBagDeserializer implements BagDeserializer {
024
025    private final Logger logger = LoggerFactory.getLogger(TarBagDeserializer.class);
026
027    protected TarBagDeserializer() {
028    }
029
030    @Override
031    public Path deserialize(final Path root) throws IOException {
032        logger.info("Extracting serialized bag: {}", root.getFileName());
033
034        final Path parent = root.getParent();
035        final int rootPathCount = root.getNameCount();
036        Optional<String> filename = Optional.empty();
037        try (TarArchiveInputStream tais = new TarArchiveInputStream(Files.newInputStream(root))) {
038            ArchiveEntry entry;
039            while ((entry = tais.getNextEntry()) != null) {
040                final String name = entry.getName();
041
042                logger.debug("Handling entry {}", entry.getName());
043                final Path archiveFile = parent.resolve(name);
044
045                if (Files.notExists(archiveFile.getParent())) {
046                    Files.createDirectories(archiveFile.getParent());
047                }
048
049                if (entry.isDirectory()) {
050                    Files.createDirectories(archiveFile);
051                    if (archiveFile.getNameCount() == rootPathCount) {
052                        logger.debug("Archive name is {}", archiveFile.getFileName());
053                        filename = Optional.of(archiveFile.getFileName().toString());
054                    }
055                } else {
056                    if (Files.exists(parent.resolve(name))) {
057                        logger.warn("File {} already exists!", name);
058                    } else {
059                        Files.copy(tais, archiveFile);
060                    }
061                }
062            }
063        }
064
065        final String extracted = filename.orElseGet(() -> {
066            // get the name from the tarball minus the extension
067            final String rootName = root.getFileName().toString();
068            final int dotIdx = rootName.lastIndexOf(".");
069            return rootName.substring(0, dotIdx);
070        });
071        return parent.resolve(extracted);
072    }
073}