001/* 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 */ 006package org.fcrepo.persistence.ocfl.impl; 007 008import static com.fasterxml.jackson.databind.SerializationFeature.WRITE_DATES_AS_TIMESTAMPS; 009import static org.apache.jena.riot.RDFFormat.NTRIPLES; 010 011import java.io.IOException; 012import java.nio.file.FileAlreadyExistsException; 013import java.nio.file.Files; 014import java.nio.file.Path; 015import java.util.function.Consumer; 016 017import javax.sql.DataSource; 018 019import org.apache.commons.lang3.SystemUtils; 020import org.apache.http.impl.auth.UnsupportedDigestAlgorithmException; 021import org.apache.jena.riot.RDFFormat; 022import org.slf4j.Logger; 023import org.slf4j.LoggerFactory; 024 025import com.fasterxml.jackson.annotation.JsonInclude; 026import com.fasterxml.jackson.databind.ObjectMapper; 027import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; 028 029import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry; 030import edu.wisc.library.ocfl.api.MutableOcflRepository; 031import edu.wisc.library.ocfl.api.OcflConfig; 032import edu.wisc.library.ocfl.api.model.DigestAlgorithm; 033import edu.wisc.library.ocfl.aws.OcflS3Client; 034import edu.wisc.library.ocfl.core.OcflRepositoryBuilder; 035import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig; 036import edu.wisc.library.ocfl.core.path.constraint.ContentPathConstraints; 037import edu.wisc.library.ocfl.core.path.mapper.LogicalPathMappers; 038import edu.wisc.library.ocfl.core.storage.cloud.CloudOcflStorage; 039import edu.wisc.library.ocfl.core.storage.filesystem.FileSystemOcflStorage; 040import software.amazon.awssdk.services.s3.S3Client; 041 042/** 043 * A set of utility functions for supporting OCFL persistence activities. 044 * 045 * @author dbernstein 046 * @since 6.0.0 047 */ 048public class OcflPersistentStorageUtils { 049 050 private static final Logger log = LoggerFactory.getLogger(OcflPersistentStorageUtils.class); 051 052 private OcflPersistentStorageUtils() { 053 } 054 055 /** 056 * The default RDF on disk format 057 * TODO Make this value configurable 058 */ 059 060 private static RDFFormat DEFAULT_RDF_FORMAT = NTRIPLES; 061 062 /** 063 * @return the RDF Format. By default NTRIPLES are returned. 064 */ 065 public static RDFFormat getRdfFormat() { 066 return DEFAULT_RDF_FORMAT; 067 } 068 069 /** 070 * @return the RDF file extension. 071 */ 072 public static String getRDFFileExtension() { 073 return "." + DEFAULT_RDF_FORMAT.getLang().getFileExtensions().get(0); 074 } 075 076 /** 077 * Create a new ocfl repository backed by the filesystem 078 * @param ocflStorageRootDir The ocfl storage root directory 079 * @param ocflWorkDir The ocfl work directory 080 * @param algorithm the algorithm for the OCFL repository 081 * @return the repository 082 */ 083 public static MutableOcflRepository createFilesystemRepository(final Path ocflStorageRootDir, 084 final Path ocflWorkDir, 085 final org.fcrepo.config.DigestAlgorithm algorithm) 086 throws IOException { 087 createDirectories(ocflStorageRootDir); 088 089 final var storage = FileSystemOcflStorage.builder().repositoryRoot(ocflStorageRootDir).build(); 090 091 return createRepository(ocflWorkDir, builder -> { 092 builder.storage(storage); 093 }, algorithm); 094 } 095 096 /** 097 * Create a new ocfl repository backed by s3 098 * 099 * @param dataSource the datasource to keep inventories in and use as a lock 100 * @param s3Client aws s3 client 101 * @param bucket the bucket to store objects in 102 * @param prefix the prefix within the bucket to store objects under 103 * @param ocflWorkDir the local directory to stage objects in 104 * @param algorithm the algorithm for the OCFL repository 105 * @param withDb true if the ocfl client should use a db 106 * @return the repository 107 */ 108 public static MutableOcflRepository createS3Repository(final DataSource dataSource, 109 final S3Client s3Client, 110 final String bucket, 111 final String prefix, 112 final Path ocflWorkDir, 113 final org.fcrepo.config.DigestAlgorithm algorithm, 114 final boolean withDb) 115 throws IOException { 116 createDirectories(ocflWorkDir); 117 118 final var storage = CloudOcflStorage.builder() 119 .cloudClient(OcflS3Client.builder() 120 .s3Client(s3Client) 121 .bucket(bucket) 122 .repoPrefix(prefix) 123 .build()) 124 .build(); 125 126 return createRepository(ocflWorkDir, builder -> { 127 builder.contentPathConstraints(ContentPathConstraints.cloud()) 128 .storage(storage); 129 130 if (withDb) { 131 builder.objectDetailsDb(db -> db.dataSource(dataSource)); 132 } 133 134 }, algorithm); 135 } 136 137 private static MutableOcflRepository createRepository(final Path ocflWorkDir, 138 final Consumer<OcflRepositoryBuilder> configurer, 139 final org.fcrepo.config.DigestAlgorithm algorithm) 140 throws IOException { 141 createDirectories(ocflWorkDir); 142 143 final DigestAlgorithm ocflDigestAlg = translateFedoraDigestToOcfl(algorithm); 144 if (ocflDigestAlg == null) { 145 throw new UnsupportedDigestAlgorithmException( 146 "Unable to map Fedora default digest algorithm " + algorithm + " into OCFL"); 147 } 148 149 final var logicalPathMapper = SystemUtils.IS_OS_WINDOWS ? 150 LogicalPathMappers.percentEncodingWindowsMapper() : LogicalPathMappers.percentEncodingLinuxMapper(); 151 152 final var builder = new OcflRepositoryBuilder() 153 .defaultLayoutConfig(new HashedNTupleLayoutConfig()) 154 .ocflConfig(new OcflConfig().setDefaultDigestAlgorithm(ocflDigestAlg)) 155 .logicalPathMapper(logicalPathMapper) 156 .workDir(ocflWorkDir); 157 158 configurer.accept(builder); 159 160 return builder.buildMutable(); 161 } 162 163 /** 164 * @return new object mapper with default config 165 */ 166 public static ObjectMapper objectMapper() { 167 return new ObjectMapper() 168 .configure(WRITE_DATES_AS_TIMESTAMPS, false) 169 .registerModule(new JavaTimeModule()) 170 .setSerializationInclusion(JsonInclude.Include.NON_NULL); 171 } 172 173 /** 174 * Translates the provided fedora digest algorithm enum into a OCFL client digest algorithm 175 * 176 * @param fcrepoAlg fedora digest algorithm 177 * @return OCFL client DigestAlgorithm, or null if no match could be made 178 */ 179 public static DigestAlgorithm translateFedoraDigestToOcfl(final org.fcrepo.config.DigestAlgorithm fcrepoAlg) { 180 return fcrepoAlg.getAliases().stream() 181 .map(alias -> DigestAlgorithmRegistry.getAlgorithm(alias)) 182 .filter(alg -> alg != null) 183 .findFirst() 184 .orElse(null); 185 } 186 187 private static Path createDirectories(final Path path) throws IOException { 188 try { 189 return Files.createDirectories(path); 190 } catch (final FileAlreadyExistsException e) { 191 // Ignore. This only happens with the path is a symlink 192 return path; 193 } 194 } 195}