001/* 002 * The contents of this file are subject to the license and copyright detailed 003 * in the LICENSE and NOTICE files at the root of the source tree. 004 */ 005package org.duraspace.bagit; 006 007import java.io.IOException; 008import java.nio.file.Path; 009import java.util.Arrays; 010import java.util.Collections; 011import java.util.HashMap; 012import java.util.HashSet; 013import java.util.Map; 014import java.util.Set; 015 016import org.apache.tika.Tika; 017import org.slf4j.Logger; 018import org.slf4j.LoggerFactory; 019 020/** 021 * Support class to retrieve {@link BagDeserializer}s from a mime type 022 * 023 * @author mikejritter 024 * @since 2020-02-11 025 */ 026public class SerializationSupport { 027 028 private static final Logger logger = LoggerFactory.getLogger(SerializationSupport.class); 029 030 // zip 031 protected static final String APPLICATION_ZIP = "application/zip"; 032 033 // tar + gtar 034 protected static final String APPLICATION_TAR = "application/tar"; 035 protected static final String APPLICATION_GTAR = "application/gtar"; 036 protected static final String APPLICATION_X_TAR = "application/x-tar"; 037 protected static final String APPLICATION_X_GTAR = "application/x-gtar"; 038 039 // gzip 040 protected static final String APPLICATION_GZIP = "application/gzip"; 041 protected static final String APPLICATION_X_GZIP = "application/x-gzip"; 042 protected static final String APPLICATION_X_COMPRESSED_TAR = "application/x-compressed-tar"; 043 044 public static final Set<String> ZIP_TYPES = Collections.singleton(APPLICATION_ZIP); 045 public static final Set<String> TAR_TYPES = new HashSet<>(Arrays.asList(APPLICATION_TAR, APPLICATION_X_TAR, 046 APPLICATION_GTAR, APPLICATION_X_GTAR)); 047 public static final Set<String> GZIP_TYPES = new HashSet<>(Arrays.asList(APPLICATION_GZIP, APPLICATION_X_GTAR, 048 APPLICATION_X_COMPRESSED_TAR)); 049 050 /** 051 * The commonTypeMap acts as a way to coerce various types onto a single format. E.g. handing application/gtar and 052 * application/tar will go through the same class, so we map application/gtar to application/tar. 053 */ 054 private static Map<String, String> commonTypeMap = initCommonTypeMapping(); 055 056 private SerializationSupport() { 057 } 058 059 /** 060 * Just a way to instantiate the {@code commonTypeMap} 061 * 062 * @return the map of supported application types 063 */ 064 private static Map<String, String> initCommonTypeMapping() { 065 commonTypeMap = new HashMap<>(); 066 commonTypeMap.put("zip", APPLICATION_ZIP); 067 commonTypeMap.put(APPLICATION_ZIP, APPLICATION_ZIP); 068 069 commonTypeMap.put("tar", APPLICATION_TAR); 070 commonTypeMap.put(APPLICATION_TAR, APPLICATION_TAR); 071 commonTypeMap.put(APPLICATION_GTAR, APPLICATION_TAR); 072 commonTypeMap.put(APPLICATION_X_TAR, APPLICATION_TAR); 073 commonTypeMap.put(APPLICATION_X_GTAR, APPLICATION_TAR); 074 075 commonTypeMap.put("tgz", APPLICATION_GZIP); 076 commonTypeMap.put("gzip", APPLICATION_GZIP); 077 commonTypeMap.put("tar+gz", APPLICATION_GZIP); 078 commonTypeMap.put(APPLICATION_GZIP, APPLICATION_GZIP); 079 commonTypeMap.put(APPLICATION_X_GZIP, APPLICATION_GZIP); 080 commonTypeMap.put(APPLICATION_X_COMPRESSED_TAR, APPLICATION_GZIP); 081 return commonTypeMap; 082 } 083 084 /** 085 * Visible for testing only 086 * Retrieve a copy of the commonTypeMap 087 * 088 * @return a copy of the commonTypeMap 089 */ 090 protected static Map<String, String> getCommonTypeMap() { 091 return new HashMap<>(commonTypeMap); 092 } 093 094 /** 095 * Get a {@link BagDeserializer} for a given content type. Currently supported are: 096 * zip ({@link SerializationSupport#ZIP_TYPES}) - {@link ZipBagDeserializer} 097 * tar ({@link SerializationSupport#TAR_TYPES}) - {@link TarBagDeserializer} 098 * tar+gz ({@link SerializationSupport#GZIP_TYPES}) - {@link GZipBagDeserializer} 099 * 100 * @param serializedBag the Bag (still serialized) to get a {@link BagDeserializer} for 101 * @param profile the {@link BagProfile} to ensure that the content type is allowed 102 * @return the {@link BagDeserializer} 103 * @throws UnsupportedOperationException if the content type is not supported 104 * @throws RuntimeException if the {@link BagProfile} does not allow serialization 105 */ 106 public static BagDeserializer deserializerFor(final Path serializedBag, final BagProfile profile) { 107 final Tika tika = new Tika(); 108 final String contentType; 109 110 try { 111 // use a less strict approach to handling content types through the commonTypeMap 112 final String detectedType = tika.detect(serializedBag); 113 contentType = commonTypeMap.getOrDefault(detectedType, detectedType); 114 logger.debug("{}: {}", serializedBag, contentType); 115 } catch (IOException e) { 116 logger.error("Unable to get content type for {}", serializedBag); 117 throw new RuntimeException(e); 118 } 119 120 if (profile.getAcceptedSerializations().contains(contentType)) { 121 if (ZIP_TYPES.contains(contentType)) { 122 return new ZipBagDeserializer(); 123 } else if (TAR_TYPES.contains(contentType)) { 124 return new TarBagDeserializer(); 125 } else if (GZIP_TYPES.contains(contentType)) { 126 return new GZipBagDeserializer(profile); 127 } else { 128 throw new UnsupportedOperationException("Unsupported content type " + contentType); 129 } 130 } 131 132 throw new RuntimeException("BagProfile does not allow " + contentType + ". Accepted serializations are:\n" + 133 profile.getAcceptedSerializations()); 134 } 135 136 /** 137 * Get a {@link BagSerializer} for a given content type and {@link BagProfile}. It takes both a short form (zip, 138 * tar, gzip) and long form (application/zip, application/tar) version for the content type. 139 * 140 * @param contentType the content type to get a {@link BagSerializer} for 141 * @param profile the {@link BagProfile} used for validating the {@code contentType} 142 * @return the {@link BagSerializer} 143 * @throws RuntimeException if the {@code contentType} is not supported 144 */ 145 public static BagSerializer serializerFor(final String contentType, final BagProfile profile) { 146 final String type = commonTypeMap.getOrDefault(contentType, contentType); 147 if (profile.getAcceptedSerializations().contains(type)) { 148 if (ZIP_TYPES.contains(type)) { 149 return new ZipBagSerializer(); 150 } else if (TAR_TYPES.contains(type)) { 151 return new TarBagSerializer(); 152 } else if (GZIP_TYPES.contains(type)) { 153 return new TarGzBagSerializer(); 154 } else { 155 throw new UnsupportedOperationException("Unsupported content type " + contentType); 156 } 157 } 158 159 throw new RuntimeException("BagProfile does not allow " + type + ". Accepted serializations are:\n" + 160 profile.getAcceptedSerializations()); 161 } 162 163}