001/*
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 */
006package org.fcrepo.persistence.ocfl.impl;
007
008import static com.fasterxml.jackson.databind.SerializationFeature.WRITE_DATES_AS_TIMESTAMPS;
009import static org.apache.jena.riot.RDFFormat.NTRIPLES;
010
011import javax.sql.DataSource;
012
013import java.io.IOException;
014import java.nio.file.FileAlreadyExistsException;
015import java.nio.file.Files;
016import java.nio.file.Path;
017import java.util.function.Consumer;
018
019import com.fasterxml.jackson.annotation.JsonInclude;
020import com.fasterxml.jackson.databind.ObjectMapper;
021import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
022import io.ocfl.api.DigestAlgorithmRegistry;
023import io.ocfl.api.MutableOcflRepository;
024import io.ocfl.api.model.DigestAlgorithm;
025import io.ocfl.api.model.OcflVersion;
026import io.ocfl.aws.OcflS3Client;
027import io.ocfl.core.OcflRepositoryBuilder;
028import io.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig;
029import io.ocfl.core.path.constraint.ContentPathConstraints;
030import io.ocfl.core.path.mapper.LogicalPathMappers;
031import io.ocfl.core.storage.OcflStorageBuilder;
032import org.apache.commons.lang3.SystemUtils;
033import org.apache.http.impl.auth.UnsupportedDigestAlgorithmException;
034import org.apache.jena.riot.RDFFormat;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037import software.amazon.awssdk.services.s3.S3AsyncClient;
038import software.amazon.awssdk.transfer.s3.S3TransferManager;
039
040/**
041 * A set of utility functions for supporting OCFL persistence activities.
042 *
043 * @author dbernstein
044 * @since 6.0.0
045 */
046public class OcflPersistentStorageUtils {
047
048    private static final Logger log = LoggerFactory.getLogger(OcflPersistentStorageUtils.class);
049
050    private OcflPersistentStorageUtils() {
051    }
052
053    /**
054     * The version of OCFL the repository is configured to use.
055     */
056    private static final OcflVersion OCFL_VERSION = OcflVersion.OCFL_1_1;
057
058    /**
059     * The default RDF on disk format
060     * TODO Make this value configurable
061     */
062    private static RDFFormat DEFAULT_RDF_FORMAT = NTRIPLES;
063
064    /**
065     * @return the RDF Format. By default NTRIPLES are returned.
066     */
067    public static RDFFormat getRdfFormat() {
068        return DEFAULT_RDF_FORMAT;
069    }
070
071    /**
072     * @return the RDF file extension.
073     */
074    public static String getRDFFileExtension() {
075        return "." + DEFAULT_RDF_FORMAT.getLang().getFileExtensions().get(0);
076    }
077
078    /**
079     * Create a new ocfl repository backed by the filesystem
080     * @param ocflStorageRootDir The ocfl storage root directory
081     * @param ocflWorkDir The ocfl work directory
082     * @param algorithm the algorithm for the OCFL repository
083     * @param ocflUpgradeOnWrite true if we want to write new versions on older objects.
084     * @param verifyInventory true if we should verify the inventory
085     * @return the repository
086     */
087    public static MutableOcflRepository createFilesystemRepository(final Path ocflStorageRootDir,
088                                                                   final Path ocflWorkDir,
089                                                                   final org.fcrepo.config.DigestAlgorithm algorithm,
090                                                                   final boolean ocflUpgradeOnWrite,
091                                                                   final boolean verifyInventory)
092            throws IOException {
093        createDirectories(ocflStorageRootDir);
094
095        final var storage = OcflStorageBuilder.builder()
096                                              .verifyInventoryDigest(verifyInventory)
097                                              .fileSystem(ocflStorageRootDir).build();
098
099        return createRepository(ocflWorkDir, builder -> {
100            builder.storage(storage);
101        }, algorithm, ocflUpgradeOnWrite);
102    }
103
104    /**
105     * Create a new ocfl repository backed by s3
106     *
107     * @param dataSource the datasource to keep inventories in and use as a lock
108     * @param s3Client aws s3 async client
109     * @param s3CrtClient aws CRT async client
110     * @param bucket the bucket to store objects in
111     * @param prefix the prefix within the bucket to store objects under
112     * @param ocflWorkDir the local directory to stage objects in
113     * @param algorithm the algorithm for the OCFL repository
114     * @param withDb true if the ocfl client should use a db
115     * @param ocflUpgradeOnWrite true if we want to write new versions on older objects.
116     * @param verifyInventory true if we should verify the ocfl inventory
117     * @return the repository
118     */
119    public static MutableOcflRepository createS3Repository(final DataSource dataSource,
120                                                           final S3AsyncClient s3Client,
121                                                           final S3AsyncClient s3CrtClient,
122                                                           final String bucket,
123                                                           final String prefix,
124                                                           final Path ocflWorkDir,
125                                                           final org.fcrepo.config.DigestAlgorithm algorithm,
126                                                           final boolean withDb,
127                                                           final boolean ocflUpgradeOnWrite,
128                                                           final boolean verifyInventory)
129            throws IOException {
130        createDirectories(ocflWorkDir);
131
132        final var transferManager = S3TransferManager.builder()
133                .s3Client(s3CrtClient).build();
134
135        final var storage = OcflStorageBuilder.builder()
136            .verifyInventoryDigest(verifyInventory)
137            .cloud(OcflS3Client.builder()
138                .transferManager(transferManager)
139                .s3Client(s3Client)
140                .bucket(bucket)
141                .repoPrefix(prefix)
142                .build())
143                .build();
144
145        return createRepository(ocflWorkDir, builder -> {
146            builder.contentPathConstraints(ContentPathConstraints.cloud())
147                    .storage(storage);
148
149            if (withDb) {
150                builder.objectDetailsDb(db -> db.dataSource(dataSource));
151            }
152
153        }, algorithm, ocflUpgradeOnWrite);
154    }
155
156    private static MutableOcflRepository createRepository(final Path ocflWorkDir,
157                                                          final Consumer<OcflRepositoryBuilder> configurer,
158                                                          final org.fcrepo.config.DigestAlgorithm algorithm,
159                                                          final boolean ocflUpgradeOnWrite)
160            throws IOException {
161        createDirectories(ocflWorkDir);
162
163        final DigestAlgorithm ocflDigestAlg = translateFedoraDigestToOcfl(algorithm);
164        if (ocflDigestAlg == null) {
165            throw new UnsupportedDigestAlgorithmException(
166                    "Unable to map Fedora default digest algorithm " + algorithm + " into OCFL");
167        }
168
169        final var logicalPathMapper = SystemUtils.IS_OS_WINDOWS ?
170                LogicalPathMappers.percentEncodingWindowsMapper() : LogicalPathMappers.percentEncodingLinuxMapper();
171
172        final var builder = new OcflRepositoryBuilder()
173                .defaultLayoutConfig(new HashedNTupleLayoutConfig())
174                .ocflConfig(config -> config.setDefaultDigestAlgorithm(ocflDigestAlg)
175                        .setOcflVersion(OCFL_VERSION)
176                        .setUpgradeObjectsOnWrite(ocflUpgradeOnWrite))
177                .logicalPathMapper(logicalPathMapper)
178                .workDir(ocflWorkDir);
179
180        configurer.accept(builder);
181
182        return builder.buildMutable();
183    }
184
185    /**
186     * @return new object mapper with default config
187     */
188    public static ObjectMapper objectMapper() {
189        return new ObjectMapper()
190                .configure(WRITE_DATES_AS_TIMESTAMPS, false)
191                .registerModule(new JavaTimeModule())
192                .setSerializationInclusion(JsonInclude.Include.NON_NULL);
193    }
194
195    /**
196     * Translates the provided fedora digest algorithm enum into a OCFL client digest algorithm
197     *
198     * @param fcrepoAlg fedora digest algorithm
199     * @return OCFL client DigestAlgorithm, or null if no match could be made
200     */
201    public static DigestAlgorithm translateFedoraDigestToOcfl(final org.fcrepo.config.DigestAlgorithm fcrepoAlg) {
202        return fcrepoAlg.getAliases().stream()
203                .map(alias -> DigestAlgorithmRegistry.getAlgorithm(alias))
204                .filter(alg -> alg != null)
205                .findFirst()
206                .orElse(null);
207    }
208
209    private static Path createDirectories(final Path path) throws IOException {
210        try {
211            return Files.createDirectories(path);
212        } catch (final FileAlreadyExistsException e) {
213            // Ignore. This only happens with the path is a symlink
214            return path;
215        }
216    }
217}