001/* 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 */ 006package org.fcrepo.persistence.ocfl.impl; 007 008import static com.fasterxml.jackson.databind.SerializationFeature.WRITE_DATES_AS_TIMESTAMPS; 009import static org.apache.jena.riot.RDFFormat.NTRIPLES; 010 011import javax.sql.DataSource; 012 013import java.io.IOException; 014import java.nio.file.FileAlreadyExistsException; 015import java.nio.file.Files; 016import java.nio.file.Path; 017import java.util.function.Consumer; 018 019import com.fasterxml.jackson.annotation.JsonInclude; 020import com.fasterxml.jackson.databind.ObjectMapper; 021import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; 022import io.ocfl.api.DigestAlgorithmRegistry; 023import io.ocfl.api.MutableOcflRepository; 024import io.ocfl.api.model.DigestAlgorithm; 025import io.ocfl.api.model.OcflVersion; 026import io.ocfl.aws.OcflS3Client; 027import io.ocfl.core.OcflRepositoryBuilder; 028import io.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig; 029import io.ocfl.core.path.constraint.ContentPathConstraints; 030import io.ocfl.core.path.mapper.LogicalPathMappers; 031import io.ocfl.core.storage.OcflStorageBuilder; 032import org.apache.commons.lang3.SystemUtils; 033import org.apache.http.impl.auth.UnsupportedDigestAlgorithmException; 034import org.apache.jena.riot.RDFFormat; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037import software.amazon.awssdk.services.s3.S3AsyncClient; 038import software.amazon.awssdk.transfer.s3.S3TransferManager; 039 040/** 041 * A set of utility functions for supporting OCFL persistence activities. 042 * 043 * @author dbernstein 044 * @since 6.0.0 045 */ 046public class OcflPersistentStorageUtils { 047 048 private static final Logger log = LoggerFactory.getLogger(OcflPersistentStorageUtils.class); 049 050 private OcflPersistentStorageUtils() { 051 } 052 053 /** 054 * The version of OCFL the repository is configured to use. 055 */ 056 private static final OcflVersion OCFL_VERSION = OcflVersion.OCFL_1_1; 057 058 /** 059 * The default RDF on disk format 060 * TODO Make this value configurable 061 */ 062 private static RDFFormat DEFAULT_RDF_FORMAT = NTRIPLES; 063 064 /** 065 * @return the RDF Format. By default NTRIPLES are returned. 066 */ 067 public static RDFFormat getRdfFormat() { 068 return DEFAULT_RDF_FORMAT; 069 } 070 071 /** 072 * @return the RDF file extension. 073 */ 074 public static String getRDFFileExtension() { 075 return "." + DEFAULT_RDF_FORMAT.getLang().getFileExtensions().get(0); 076 } 077 078 /** 079 * Create a new ocfl repository backed by the filesystem 080 * @param ocflStorageRootDir The ocfl storage root directory 081 * @param ocflWorkDir The ocfl work directory 082 * @param algorithm the algorithm for the OCFL repository 083 * @param ocflUpgradeOnWrite true if we want to write new versions on older objects. 084 * @param verifyInventory true if we should verify the inventory 085 * @return the repository 086 */ 087 public static MutableOcflRepository createFilesystemRepository(final Path ocflStorageRootDir, 088 final Path ocflWorkDir, 089 final org.fcrepo.config.DigestAlgorithm algorithm, 090 final boolean ocflUpgradeOnWrite, 091 final boolean verifyInventory) 092 throws IOException { 093 createDirectories(ocflStorageRootDir); 094 095 final var storage = OcflStorageBuilder.builder() 096 .verifyInventoryDigest(verifyInventory) 097 .fileSystem(ocflStorageRootDir).build(); 098 099 return createRepository(ocflWorkDir, builder -> { 100 builder.storage(storage); 101 }, algorithm, ocflUpgradeOnWrite); 102 } 103 104 /** 105 * Create a new ocfl repository backed by s3 106 * 107 * @param dataSource the datasource to keep inventories in and use as a lock 108 * @param s3Client aws s3 async client 109 * @param s3CrtClient aws CRT async client 110 * @param bucket the bucket to store objects in 111 * @param prefix the prefix within the bucket to store objects under 112 * @param ocflWorkDir the local directory to stage objects in 113 * @param algorithm the algorithm for the OCFL repository 114 * @param withDb true if the ocfl client should use a db 115 * @param ocflUpgradeOnWrite true if we want to write new versions on older objects. 116 * @param verifyInventory true if we should verify the ocfl inventory 117 * @return the repository 118 */ 119 public static MutableOcflRepository createS3Repository(final DataSource dataSource, 120 final S3AsyncClient s3Client, 121 final S3AsyncClient s3CrtClient, 122 final String bucket, 123 final String prefix, 124 final Path ocflWorkDir, 125 final org.fcrepo.config.DigestAlgorithm algorithm, 126 final boolean withDb, 127 final boolean ocflUpgradeOnWrite, 128 final boolean verifyInventory) 129 throws IOException { 130 createDirectories(ocflWorkDir); 131 132 final var transferManager = S3TransferManager.builder() 133 .s3Client(s3CrtClient).build(); 134 135 final var storage = OcflStorageBuilder.builder() 136 .verifyInventoryDigest(verifyInventory) 137 .cloud(OcflS3Client.builder() 138 .transferManager(transferManager) 139 .s3Client(s3Client) 140 .bucket(bucket) 141 .repoPrefix(prefix) 142 .build()) 143 .build(); 144 145 return createRepository(ocflWorkDir, builder -> { 146 builder.contentPathConstraints(ContentPathConstraints.cloud()) 147 .storage(storage); 148 149 if (withDb) { 150 builder.objectDetailsDb(db -> db.dataSource(dataSource)); 151 } 152 153 }, algorithm, ocflUpgradeOnWrite); 154 } 155 156 private static MutableOcflRepository createRepository(final Path ocflWorkDir, 157 final Consumer<OcflRepositoryBuilder> configurer, 158 final org.fcrepo.config.DigestAlgorithm algorithm, 159 final boolean ocflUpgradeOnWrite) 160 throws IOException { 161 createDirectories(ocflWorkDir); 162 163 final DigestAlgorithm ocflDigestAlg = translateFedoraDigestToOcfl(algorithm); 164 if (ocflDigestAlg == null) { 165 throw new UnsupportedDigestAlgorithmException( 166 "Unable to map Fedora default digest algorithm " + algorithm + " into OCFL"); 167 } 168 169 final var logicalPathMapper = SystemUtils.IS_OS_WINDOWS ? 170 LogicalPathMappers.percentEncodingWindowsMapper() : LogicalPathMappers.percentEncodingLinuxMapper(); 171 172 final var builder = new OcflRepositoryBuilder() 173 .defaultLayoutConfig(new HashedNTupleLayoutConfig()) 174 .ocflConfig(config -> config.setDefaultDigestAlgorithm(ocflDigestAlg) 175 .setOcflVersion(OCFL_VERSION) 176 .setUpgradeObjectsOnWrite(ocflUpgradeOnWrite)) 177 .logicalPathMapper(logicalPathMapper) 178 .workDir(ocflWorkDir); 179 180 configurer.accept(builder); 181 182 return builder.buildMutable(); 183 } 184 185 /** 186 * @return new object mapper with default config 187 */ 188 public static ObjectMapper objectMapper() { 189 return new ObjectMapper() 190 .configure(WRITE_DATES_AS_TIMESTAMPS, false) 191 .registerModule(new JavaTimeModule()) 192 .setSerializationInclusion(JsonInclude.Include.NON_NULL); 193 } 194 195 /** 196 * Translates the provided fedora digest algorithm enum into a OCFL client digest algorithm 197 * 198 * @param fcrepoAlg fedora digest algorithm 199 * @return OCFL client DigestAlgorithm, or null if no match could be made 200 */ 201 public static DigestAlgorithm translateFedoraDigestToOcfl(final org.fcrepo.config.DigestAlgorithm fcrepoAlg) { 202 return fcrepoAlg.getAliases().stream() 203 .map(alias -> DigestAlgorithmRegistry.getAlgorithm(alias)) 204 .filter(alg -> alg != null) 205 .findFirst() 206 .orElse(null); 207 } 208 209 private static Path createDirectories(final Path path) throws IOException { 210 try { 211 return Files.createDirectories(path); 212 } catch (final FileAlreadyExistsException e) { 213 // Ignore. This only happens with the path is a symlink 214 return path; 215 } 216 } 217}