001/* 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 */ 006package org.fcrepo.persistence.ocfl.impl; 007 008import static com.fasterxml.jackson.databind.SerializationFeature.WRITE_DATES_AS_TIMESTAMPS; 009import static org.apache.jena.riot.RDFFormat.NTRIPLES; 010 011import java.io.IOException; 012import java.nio.file.FileAlreadyExistsException; 013import java.nio.file.Files; 014import java.nio.file.Path; 015import java.util.function.Consumer; 016 017import javax.sql.DataSource; 018 019import com.fasterxml.jackson.annotation.JsonInclude; 020import com.fasterxml.jackson.databind.ObjectMapper; 021import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; 022import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry; 023import edu.wisc.library.ocfl.api.MutableOcflRepository; 024import edu.wisc.library.ocfl.api.OcflConfig; 025import edu.wisc.library.ocfl.api.model.DigestAlgorithm; 026import edu.wisc.library.ocfl.aws.OcflS3Client; 027import edu.wisc.library.ocfl.core.OcflRepositoryBuilder; 028import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig; 029import edu.wisc.library.ocfl.core.path.constraint.ContentPathConstraints; 030import edu.wisc.library.ocfl.core.path.mapper.LogicalPathMappers; 031import edu.wisc.library.ocfl.core.storage.OcflStorageBuilder; 032import org.apache.commons.lang3.SystemUtils; 033import org.apache.http.impl.auth.UnsupportedDigestAlgorithmException; 034import org.apache.jena.riot.RDFFormat; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037import software.amazon.awssdk.services.s3.S3Client; 038 039/** 040 * A set of utility functions for supporting OCFL persistence activities. 041 * 042 * @author dbernstein 043 * @since 6.0.0 044 */ 045public class OcflPersistentStorageUtils { 046 047 private static final Logger log = LoggerFactory.getLogger(OcflPersistentStorageUtils.class); 048 049 private OcflPersistentStorageUtils() { 050 } 051 052 /** 053 * The default RDF on disk format 054 * TODO Make this value configurable 055 */ 056 057 private static RDFFormat DEFAULT_RDF_FORMAT = NTRIPLES; 058 059 /** 060 * @return the RDF Format. By default NTRIPLES are returned. 061 */ 062 public static RDFFormat getRdfFormat() { 063 return DEFAULT_RDF_FORMAT; 064 } 065 066 /** 067 * @return the RDF file extension. 068 */ 069 public static String getRDFFileExtension() { 070 return "." + DEFAULT_RDF_FORMAT.getLang().getFileExtensions().get(0); 071 } 072 073 /** 074 * Create a new ocfl repository backed by the filesystem 075 * @param ocflStorageRootDir The ocfl storage root directory 076 * @param ocflWorkDir The ocfl work directory 077 * @param algorithm the algorithm for the OCFL repository 078 * @return the repository 079 */ 080 public static MutableOcflRepository createFilesystemRepository(final Path ocflStorageRootDir, 081 final Path ocflWorkDir, 082 final org.fcrepo.config.DigestAlgorithm algorithm) 083 throws IOException { 084 createDirectories(ocflStorageRootDir); 085 086 final var storage = OcflStorageBuilder.builder().fileSystem(ocflStorageRootDir).build(); 087 088 return createRepository(ocflWorkDir, builder -> { 089 builder.storage(storage); 090 }, algorithm); 091 } 092 093 /** 094 * Create a new ocfl repository backed by s3 095 * 096 * @param dataSource the datasource to keep inventories in and use as a lock 097 * @param s3Client aws s3 client 098 * @param bucket the bucket to store objects in 099 * @param prefix the prefix within the bucket to store objects under 100 * @param ocflWorkDir the local directory to stage objects in 101 * @param algorithm the algorithm for the OCFL repository 102 * @param withDb true if the ocfl client should use a db 103 * @return the repository 104 */ 105 public static MutableOcflRepository createS3Repository(final DataSource dataSource, 106 final S3Client s3Client, 107 final String bucket, 108 final String prefix, 109 final Path ocflWorkDir, 110 final org.fcrepo.config.DigestAlgorithm algorithm, 111 final boolean withDb) 112 throws IOException { 113 createDirectories(ocflWorkDir); 114 115 final var storage = OcflStorageBuilder.builder().cloud(OcflS3Client.builder() 116 .s3Client(s3Client) 117 .bucket(bucket) 118 .repoPrefix(prefix) 119 .build()) 120 .build(); 121 122 return createRepository(ocflWorkDir, builder -> { 123 builder.contentPathConstraints(ContentPathConstraints.cloud()) 124 .storage(storage); 125 126 if (withDb) { 127 builder.objectDetailsDb(db -> db.dataSource(dataSource)); 128 } 129 130 }, algorithm); 131 } 132 133 private static MutableOcflRepository createRepository(final Path ocflWorkDir, 134 final Consumer<OcflRepositoryBuilder> configurer, 135 final org.fcrepo.config.DigestAlgorithm algorithm) 136 throws IOException { 137 createDirectories(ocflWorkDir); 138 139 final DigestAlgorithm ocflDigestAlg = translateFedoraDigestToOcfl(algorithm); 140 if (ocflDigestAlg == null) { 141 throw new UnsupportedDigestAlgorithmException( 142 "Unable to map Fedora default digest algorithm " + algorithm + " into OCFL"); 143 } 144 145 final var logicalPathMapper = SystemUtils.IS_OS_WINDOWS ? 146 LogicalPathMappers.percentEncodingWindowsMapper() : LogicalPathMappers.percentEncodingLinuxMapper(); 147 148 final var builder = new OcflRepositoryBuilder() 149 .defaultLayoutConfig(new HashedNTupleLayoutConfig()) 150 .ocflConfig(new OcflConfig().setDefaultDigestAlgorithm(ocflDigestAlg)) 151 .logicalPathMapper(logicalPathMapper) 152 .workDir(ocflWorkDir); 153 154 configurer.accept(builder); 155 156 return builder.buildMutable(); 157 } 158 159 /** 160 * @return new object mapper with default config 161 */ 162 public static ObjectMapper objectMapper() { 163 return new ObjectMapper() 164 .configure(WRITE_DATES_AS_TIMESTAMPS, false) 165 .registerModule(new JavaTimeModule()) 166 .setSerializationInclusion(JsonInclude.Include.NON_NULL); 167 } 168 169 /** 170 * Translates the provided fedora digest algorithm enum into a OCFL client digest algorithm 171 * 172 * @param fcrepoAlg fedora digest algorithm 173 * @return OCFL client DigestAlgorithm, or null if no match could be made 174 */ 175 public static DigestAlgorithm translateFedoraDigestToOcfl(final org.fcrepo.config.DigestAlgorithm fcrepoAlg) { 176 return fcrepoAlg.getAliases().stream() 177 .map(alias -> DigestAlgorithmRegistry.getAlgorithm(alias)) 178 .filter(alg -> alg != null) 179 .findFirst() 180 .orElse(null); 181 } 182 183 private static Path createDirectories(final Path path) throws IOException { 184 try { 185 return Files.createDirectories(path); 186 } catch (final FileAlreadyExistsException e) { 187 // Ignore. This only happens with the path is a symlink 188 return path; 189 } 190 } 191}