001/*
002 * The contents of this file are subject to the license and copyright detailed
003 * in the LICENSE and NOTICE files at the root of the source tree.
004 */
005package org.duraspace.bagit;
006
007import java.io.IOException;
008import java.nio.file.Path;
009import java.util.Arrays;
010import java.util.Collections;
011import java.util.HashMap;
012import java.util.HashSet;
013import java.util.Map;
014import java.util.Set;
015
016import org.apache.tika.Tika;
017import org.slf4j.Logger;
018import org.slf4j.LoggerFactory;
019
020/**
021 * Support class to retrieve {@link BagDeserializer}s from a mime type
022 *
023 * @author mikejritter
024 * @since 2020-02-11
025 */
026public class SerializationSupport {
027
028    private static final Logger logger = LoggerFactory.getLogger(SerializationSupport.class);
029
030    // zip
031    protected static final String APPLICATION_ZIP = "application/zip";
032
033    // tar + gtar
034    protected static final String APPLICATION_TAR = "application/tar";
035    protected static final String APPLICATION_GTAR = "application/gtar";
036    protected static final String APPLICATION_X_TAR = "application/x-tar";
037    protected static final String APPLICATION_X_GTAR = "application/x-gtar";
038
039    // gzip
040    protected static final String APPLICATION_GZIP = "application/gzip";
041    protected static final String APPLICATION_X_GZIP = "application/x-gzip";
042    protected static final String APPLICATION_X_COMPRESSED_TAR = "application/x-compressed-tar";
043
044    public static final Set<String> ZIP_TYPES = Collections.singleton(APPLICATION_ZIP);
045    public static final Set<String> TAR_TYPES = new HashSet<>(Arrays.asList(APPLICATION_TAR, APPLICATION_X_TAR,
046                                                                            APPLICATION_GTAR, APPLICATION_X_GTAR));
047    public static final Set<String> GZIP_TYPES = new HashSet<>(Arrays.asList(APPLICATION_GZIP, APPLICATION_X_GTAR,
048                                                                             APPLICATION_X_COMPRESSED_TAR));
049
050    /**
051     * The commonTypeMap acts as a way to coerce various types onto a single format. E.g. handing application/gtar and
052     * application/tar will go through the same class, so we map application/gtar to application/tar.
053     */
054    private static Map<String, String> commonTypeMap = initCommonTypeMapping();
055
056    private SerializationSupport() {
057    }
058
059    /**
060     * Just a way to instantiate the {@code commonTypeMap}
061     *
062     * @return the map of supported application types
063     */
064    private static Map<String, String> initCommonTypeMapping() {
065        commonTypeMap = new HashMap<>();
066        commonTypeMap.put("zip", APPLICATION_ZIP);
067        commonTypeMap.put(APPLICATION_ZIP, APPLICATION_ZIP);
068
069        commonTypeMap.put("tar", APPLICATION_TAR);
070        commonTypeMap.put(APPLICATION_TAR, APPLICATION_TAR);
071        commonTypeMap.put(APPLICATION_GTAR, APPLICATION_TAR);
072        commonTypeMap.put(APPLICATION_X_TAR, APPLICATION_TAR);
073        commonTypeMap.put(APPLICATION_X_GTAR, APPLICATION_TAR);
074
075        commonTypeMap.put("tgz", APPLICATION_GZIP);
076        commonTypeMap.put("gzip", APPLICATION_GZIP);
077        commonTypeMap.put("tar+gz", APPLICATION_GZIP);
078        commonTypeMap.put(APPLICATION_GZIP, APPLICATION_GZIP);
079        commonTypeMap.put(APPLICATION_X_GZIP, APPLICATION_GZIP);
080        commonTypeMap.put(APPLICATION_X_COMPRESSED_TAR, APPLICATION_GZIP);
081        return commonTypeMap;
082    }
083
084    /**
085     * Visible for testing only
086     * Retrieve a copy of the commonTypeMap
087     *
088     * @return a copy of the commonTypeMap
089     */
090    protected static Map<String, String> getCommonTypeMap() {
091        return new HashMap<>(commonTypeMap);
092    }
093
094    /**
095     * Get a {@link BagDeserializer} for a given content type. Currently supported are:
096     * zip ({@link SerializationSupport#ZIP_TYPES}) - {@link ZipBagDeserializer}
097     * tar ({@link SerializationSupport#TAR_TYPES}) - {@link TarBagDeserializer}
098     * tar+gz ({@link SerializationSupport#GZIP_TYPES}) - {@link GZipBagDeserializer}
099     *
100     * @param serializedBag the Bag (still serialized) to get a {@link BagDeserializer} for
101     * @param profile the {@link BagProfile} to ensure that the content type is allowed
102     * @return the {@link BagDeserializer}
103     * @throws UnsupportedOperationException if the content type is not supported
104     * @throws RuntimeException if the {@link BagProfile} does not allow serialization
105     */
106    public static BagDeserializer deserializerFor(final Path serializedBag, final BagProfile profile) {
107        final Tika tika = new Tika();
108        final String contentType;
109
110        try {
111            // use a less strict approach to handling content types through the commonTypeMap
112            final String detectedType = tika.detect(serializedBag);
113            contentType = commonTypeMap.getOrDefault(detectedType, detectedType);
114            logger.debug("{}: {}", serializedBag, contentType);
115        } catch (IOException e) {
116            logger.error("Unable to get content type for {}", serializedBag);
117            throw new RuntimeException(e);
118        }
119
120        if (profile.getAcceptedSerializations().contains(contentType)) {
121            if (ZIP_TYPES.contains(contentType)) {
122                return new ZipBagDeserializer();
123            } else if (TAR_TYPES.contains(contentType)) {
124                return new TarBagDeserializer();
125            } else if (GZIP_TYPES.contains(contentType)) {
126                return new GZipBagDeserializer(profile);
127            } else {
128                throw new UnsupportedOperationException("Unsupported content type " + contentType);
129            }
130        }
131
132        throw new RuntimeException("BagProfile does not allow " + contentType + ". Accepted serializations are:\n" +
133                                   profile.getAcceptedSerializations());
134    }
135
136    /**
137     * Get a {@link BagSerializer} for a given content type and {@link BagProfile}. It takes both a short form (zip,
138     * tar, gzip) and long form (application/zip, application/tar) version for the content type.
139     *
140     * @param contentType the content type to get a {@link BagSerializer} for
141     * @param profile the {@link BagProfile} used for validating the {@code contentType}
142     * @return the {@link BagSerializer}
143     * @throws RuntimeException if the {@code contentType} is not supported
144     */
145    public static BagSerializer serializerFor(final String contentType, final BagProfile profile) {
146        final String type = commonTypeMap.getOrDefault(contentType, contentType);
147        if (profile.getAcceptedSerializations().contains(type)) {
148            if (ZIP_TYPES.contains(type)) {
149                return new ZipBagSerializer();
150            } else if (TAR_TYPES.contains(type)) {
151                return new TarBagSerializer();
152            } else if (GZIP_TYPES.contains(type)) {
153                return new TarGzBagSerializer();
154            } else {
155                throw new UnsupportedOperationException("Unsupported content type " + contentType);
156            }
157        }
158
159        throw new RuntimeException("BagProfile does not allow " + type + ". Accepted serializations are:\n" +
160                                   profile.getAcceptedSerializations());
161    }
162
163}