001/* 002 * The contents of this file are subject to the license and copyright detailed 003 * in the LICENSE and NOTICE files at the root of the source tree. 004 */ 005package org.duraspace.bagit; 006 007import java.io.IOException; 008import java.nio.file.Path; 009import java.util.Arrays; 010import java.util.Collections; 011import java.util.HashMap; 012import java.util.HashSet; 013import java.util.Map; 014import java.util.Set; 015 016import org.apache.tika.Tika; 017import org.slf4j.Logger; 018import org.slf4j.LoggerFactory; 019 020/** 021 * Support class to retrieve {@link BagDeserializer}s from a mime type 022 * 023 * @author mikejritter 024 * @since 2020-02-11 025 */ 026public class SerializationSupport { 027 028 private static final Logger logger = LoggerFactory.getLogger(SerializationSupport.class); 029 030 // zip 031 private static final String APPLICATION_ZIP = "application/zip"; 032 033 // tar + gtar 034 private static final String APPLICATION_TAR = "application/tar"; 035 private static final String APPLICATION_GTAR = "application/gtar"; 036 private static final String APPLICATION_X_TAR = "application/x-tar"; 037 private static final String APPLICATION_X_GTAR = "application/x-gtar"; 038 039 // gzip 040 private static final String APPLICATION_GZIP = "application/gzip"; 041 private static final String APPLICATION_X_GZIP = "application/x-gzip"; 042 private static final String APPLICATION_X_COMPRESSED_TAR = "application/x-compressed-tar"; 043 044 public static final Set<String> ZIP_TYPES = Collections.singleton(APPLICATION_ZIP); 045 public static final Set<String> TAR_TYPES = new HashSet<>(Arrays.asList(APPLICATION_TAR, APPLICATION_X_TAR, 046 APPLICATION_GTAR, APPLICATION_X_GTAR)); 047 public static final Set<String> GZIP_TYPES = new HashSet<>(Arrays.asList(APPLICATION_GZIP, APPLICATION_X_GTAR, 048 APPLICATION_X_COMPRESSED_TAR)); 049 050 /** 051 * The commonTypeMap acts as a way to coerce various types onto a single format. E.g. handing application/gtar and 052 * application/tar will go through the same class, so we map application/gtar to application/tar. 053 */ 054 private static Map<String, String> commonTypeMap = initCommonTypeMapping(); 055 056 private SerializationSupport() { 057 } 058 059 /** 060 * Just a way to instantiate the {@code commonTypeMap} 061 * 062 * @return the map of supported application types 063 */ 064 private static Map<String, String> initCommonTypeMapping() { 065 commonTypeMap = new HashMap<>(); 066 commonTypeMap.put("zip", APPLICATION_ZIP); 067 commonTypeMap.put(APPLICATION_ZIP, APPLICATION_ZIP); 068 069 commonTypeMap.put("tar", APPLICATION_TAR); 070 commonTypeMap.put(APPLICATION_TAR, APPLICATION_TAR); 071 commonTypeMap.put(APPLICATION_GTAR, APPLICATION_TAR); 072 commonTypeMap.put(APPLICATION_X_TAR, APPLICATION_X_TAR); 073 commonTypeMap.put(APPLICATION_X_GTAR, APPLICATION_X_TAR); 074 075 commonTypeMap.put("tgz", APPLICATION_GZIP); 076 commonTypeMap.put("gzip", APPLICATION_GZIP); 077 commonTypeMap.put("tar+gz", APPLICATION_GZIP); 078 commonTypeMap.put(APPLICATION_GZIP, APPLICATION_GZIP); 079 commonTypeMap.put(APPLICATION_X_GZIP, APPLICATION_GZIP); 080 commonTypeMap.put(APPLICATION_X_COMPRESSED_TAR, APPLICATION_GZIP); 081 return commonTypeMap; 082 } 083 084 /** 085 * Get a {@link BagDeserializer} for a given content type. Currently supported are: 086 * zip ({@link SerializationSupport#ZIP_TYPES}) - {@link ZipBagDeserializer} 087 * tar ({@link SerializationSupport#TAR_TYPES}) - {@link TarBagDeserializer} 088 * tar+gz ({@link SerializationSupport#GZIP_TYPES}) - {@link GZipBagDeserializer} 089 * 090 * @param serializedBag the Bag (still serialized) to get a {@link BagDeserializer} for 091 * @param profile the {@link BagProfile} to ensure that the content type is allowed 092 * @return the {@link BagDeserializer} 093 * @throws UnsupportedOperationException if the content type is not supported 094 * @throws RuntimeException if the {@link BagProfile} does not allow serialization 095 */ 096 public static BagDeserializer deserializerFor(final Path serializedBag, final BagProfile profile) { 097 final Tika tika = new Tika(); 098 final String contentType; 099 100 try { 101 // use a less strict approach to handling content types through the commonTypeMap 102 final String detectedType = tika.detect(serializedBag); 103 contentType = commonTypeMap.getOrDefault(detectedType, detectedType); 104 logger.debug("{}: {}", serializedBag, contentType); 105 } catch (IOException e) { 106 logger.error("Unable to get content type for {}", serializedBag); 107 throw new RuntimeException(e); 108 } 109 110 if (profile.getAcceptedSerializations().contains(contentType)) { 111 if (ZIP_TYPES.contains(contentType)) { 112 return new ZipBagDeserializer(); 113 } else if (TAR_TYPES.contains(contentType)) { 114 return new TarBagDeserializer(); 115 } else if (GZIP_TYPES.contains(contentType)) { 116 return new GZipBagDeserializer(profile); 117 } else { 118 throw new UnsupportedOperationException("Unsupported content type " + contentType); 119 } 120 } 121 122 // todo: format list correctly 123 throw new RuntimeException("BagProfile does not allow " + contentType + ". Accepted serializations are:\n" + 124 profile.getAcceptedSerializations()); 125 } 126 127 /** 128 * Get a {@link BagSerializer} for a given content type and {@link BagProfile}. It takes both a short form (zip, 129 * tar, gzip) and long form (application/zip, application/tar) version for the content type. 130 * 131 * @param contentType the content type to get a {@link BagSerializer} for 132 * @param profile the {@link BagProfile} used for validating the {@code contentType} 133 * @return the {@link BagSerializer} 134 * @throws RuntimeException if the {@code contentType} is not supported 135 */ 136 public static BagSerializer serializerFor(final String contentType, final BagProfile profile) { 137 final String type = commonTypeMap.getOrDefault(contentType, contentType); 138 if (profile.getAcceptedSerializations().contains(type)) { 139 if (ZIP_TYPES.contains(type)) { 140 return new ZipBagSerializer(); 141 } else if (TAR_TYPES.contains(type)) { 142 return new TarBagSerializer(); 143 } else if (GZIP_TYPES.contains(type)) { 144 return new TarGzBagSerializer(); 145 } 146 } 147 148 // todo: proper formatting of list 149 throw new RuntimeException("BagProfile does not allow " + type + ". Accepted serializations are:\n" + 150 profile.getAcceptedSerializations()); 151 } 152 153}