001/* 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 */ 006package org.fcrepo.persistence.ocfl.impl; 007 008import static com.fasterxml.jackson.databind.SerializationFeature.WRITE_DATES_AS_TIMESTAMPS; 009import static org.apache.jena.riot.RDFFormat.NTRIPLES; 010 011import java.io.IOException; 012import java.nio.file.FileAlreadyExistsException; 013import java.nio.file.Files; 014import java.nio.file.Path; 015import java.util.function.Consumer; 016 017import javax.sql.DataSource; 018 019import com.fasterxml.jackson.annotation.JsonInclude; 020import com.fasterxml.jackson.databind.ObjectMapper; 021import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; 022import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry; 023import edu.wisc.library.ocfl.api.MutableOcflRepository; 024import edu.wisc.library.ocfl.api.model.DigestAlgorithm; 025import edu.wisc.library.ocfl.api.model.OcflVersion; 026import edu.wisc.library.ocfl.aws.OcflS3Client; 027import edu.wisc.library.ocfl.core.OcflRepositoryBuilder; 028import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig; 029import edu.wisc.library.ocfl.core.path.constraint.ContentPathConstraints; 030import edu.wisc.library.ocfl.core.path.mapper.LogicalPathMappers; 031import edu.wisc.library.ocfl.core.storage.OcflStorageBuilder; 032import org.apache.commons.lang3.SystemUtils; 033import org.apache.http.impl.auth.UnsupportedDigestAlgorithmException; 034import org.apache.jena.riot.RDFFormat; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037import software.amazon.awssdk.services.s3.S3Client; 038 039/** 040 * A set of utility functions for supporting OCFL persistence activities. 041 * 042 * @author dbernstein 043 * @since 6.0.0 044 */ 045public class OcflPersistentStorageUtils { 046 047 private static final Logger log = LoggerFactory.getLogger(OcflPersistentStorageUtils.class); 048 049 private OcflPersistentStorageUtils() { 050 } 051 052 /** 053 * The version of OCFL the repository is configured to use. 054 */ 055 private static final OcflVersion OCFL_VERSION = OcflVersion.OCFL_1_1; 056 057 /** 058 * The default RDF on disk format 059 * TODO Make this value configurable 060 */ 061 private static RDFFormat DEFAULT_RDF_FORMAT = NTRIPLES; 062 063 /** 064 * @return the RDF Format. By default NTRIPLES are returned. 065 */ 066 public static RDFFormat getRdfFormat() { 067 return DEFAULT_RDF_FORMAT; 068 } 069 070 /** 071 * @return the RDF file extension. 072 */ 073 public static String getRDFFileExtension() { 074 return "." + DEFAULT_RDF_FORMAT.getLang().getFileExtensions().get(0); 075 } 076 077 /** 078 * Create a new ocfl repository backed by the filesystem 079 * @param ocflStorageRootDir The ocfl storage root directory 080 * @param ocflWorkDir The ocfl work directory 081 * @param algorithm the algorithm for the OCFL repository 082 * @param ocflUpgradeOnWrite true if we want to write new versions on older objects. 083 * @param verifyInventory true if we should verify the inventory 084 * @return the repository 085 */ 086 public static MutableOcflRepository createFilesystemRepository(final Path ocflStorageRootDir, 087 final Path ocflWorkDir, 088 final org.fcrepo.config.DigestAlgorithm algorithm, 089 final boolean ocflUpgradeOnWrite, 090 final boolean verifyInventory) 091 throws IOException { 092 createDirectories(ocflStorageRootDir); 093 094 final var storage = OcflStorageBuilder.builder() 095 .verifyInventoryDigest(verifyInventory) 096 .fileSystem(ocflStorageRootDir).build(); 097 098 return createRepository(ocflWorkDir, builder -> { 099 builder.storage(storage); 100 }, algorithm, ocflUpgradeOnWrite); 101 } 102 103 /** 104 * Create a new ocfl repository backed by s3 105 * 106 * @param dataSource the datasource to keep inventories in and use as a lock 107 * @param s3Client aws s3 client 108 * @param bucket the bucket to store objects in 109 * @param prefix the prefix within the bucket to store objects under 110 * @param ocflWorkDir the local directory to stage objects in 111 * @param algorithm the algorithm for the OCFL repository 112 * @param withDb true if the ocfl client should use a db 113 * @param ocflUpgradeOnWrite true if we want to write new versions on older objects. 114 * @param verifyInventory true if we should verify the ocfl inventory 115 * @return the repository 116 */ 117 public static MutableOcflRepository createS3Repository(final DataSource dataSource, 118 final S3Client s3Client, 119 final String bucket, 120 final String prefix, 121 final Path ocflWorkDir, 122 final org.fcrepo.config.DigestAlgorithm algorithm, 123 final boolean withDb, 124 final boolean ocflUpgradeOnWrite, 125 final boolean verifyInventory) 126 throws IOException { 127 createDirectories(ocflWorkDir); 128 129 final var storage = OcflStorageBuilder.builder() 130 .verifyInventoryDigest(verifyInventory) 131 .cloud(OcflS3Client.builder() 132 .s3Client(s3Client) 133 .bucket(bucket) 134 .repoPrefix(prefix) 135 .build()) 136 .build(); 137 138 return createRepository(ocflWorkDir, builder -> { 139 builder.contentPathConstraints(ContentPathConstraints.cloud()) 140 .storage(storage); 141 142 if (withDb) { 143 builder.objectDetailsDb(db -> db.dataSource(dataSource)); 144 } 145 146 }, algorithm, ocflUpgradeOnWrite); 147 } 148 149 private static MutableOcflRepository createRepository(final Path ocflWorkDir, 150 final Consumer<OcflRepositoryBuilder> configurer, 151 final org.fcrepo.config.DigestAlgorithm algorithm, 152 final boolean ocflUpgradeOnWrite) 153 throws IOException { 154 createDirectories(ocflWorkDir); 155 156 final DigestAlgorithm ocflDigestAlg = translateFedoraDigestToOcfl(algorithm); 157 if (ocflDigestAlg == null) { 158 throw new UnsupportedDigestAlgorithmException( 159 "Unable to map Fedora default digest algorithm " + algorithm + " into OCFL"); 160 } 161 162 final var logicalPathMapper = SystemUtils.IS_OS_WINDOWS ? 163 LogicalPathMappers.percentEncodingWindowsMapper() : LogicalPathMappers.percentEncodingLinuxMapper(); 164 165 final var builder = new OcflRepositoryBuilder() 166 .defaultLayoutConfig(new HashedNTupleLayoutConfig()) 167 .ocflConfig(config -> config.setDefaultDigestAlgorithm(ocflDigestAlg) 168 .setOcflVersion(OCFL_VERSION) 169 .setUpgradeObjectsOnWrite(ocflUpgradeOnWrite)) 170 .logicalPathMapper(logicalPathMapper) 171 .workDir(ocflWorkDir); 172 173 configurer.accept(builder); 174 175 return builder.buildMutable(); 176 } 177 178 /** 179 * @return new object mapper with default config 180 */ 181 public static ObjectMapper objectMapper() { 182 return new ObjectMapper() 183 .configure(WRITE_DATES_AS_TIMESTAMPS, false) 184 .registerModule(new JavaTimeModule()) 185 .setSerializationInclusion(JsonInclude.Include.NON_NULL); 186 } 187 188 /** 189 * Translates the provided fedora digest algorithm enum into a OCFL client digest algorithm 190 * 191 * @param fcrepoAlg fedora digest algorithm 192 * @return OCFL client DigestAlgorithm, or null if no match could be made 193 */ 194 public static DigestAlgorithm translateFedoraDigestToOcfl(final org.fcrepo.config.DigestAlgorithm fcrepoAlg) { 195 return fcrepoAlg.getAliases().stream() 196 .map(alias -> DigestAlgorithmRegistry.getAlgorithm(alias)) 197 .filter(alg -> alg != null) 198 .findFirst() 199 .orElse(null); 200 } 201 202 private static Path createDirectories(final Path path) throws IOException { 203 try { 204 return Files.createDirectories(path); 205 } catch (final FileAlreadyExistsException e) { 206 // Ignore. This only happens with the path is a symlink 207 return path; 208 } 209 } 210}