001/*
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 */
006package org.fcrepo.persistence.ocfl.impl;
007
008import static com.fasterxml.jackson.databind.SerializationFeature.WRITE_DATES_AS_TIMESTAMPS;
009import static org.apache.jena.riot.RDFFormat.NTRIPLES;
010
011import java.io.IOException;
012import java.nio.file.FileAlreadyExistsException;
013import java.nio.file.Files;
014import java.nio.file.Path;
015import java.util.function.Consumer;
016
017import javax.sql.DataSource;
018
019import org.apache.commons.lang3.SystemUtils;
020import org.apache.http.impl.auth.UnsupportedDigestAlgorithmException;
021import org.apache.jena.riot.RDFFormat;
022import org.slf4j.Logger;
023import org.slf4j.LoggerFactory;
024
025import com.fasterxml.jackson.annotation.JsonInclude;
026import com.fasterxml.jackson.databind.ObjectMapper;
027import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
028
029import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry;
030import edu.wisc.library.ocfl.api.MutableOcflRepository;
031import edu.wisc.library.ocfl.api.OcflConfig;
032import edu.wisc.library.ocfl.api.model.DigestAlgorithm;
033import edu.wisc.library.ocfl.aws.OcflS3Client;
034import edu.wisc.library.ocfl.core.OcflRepositoryBuilder;
035import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig;
036import edu.wisc.library.ocfl.core.path.constraint.ContentPathConstraints;
037import edu.wisc.library.ocfl.core.path.mapper.LogicalPathMappers;
038import edu.wisc.library.ocfl.core.storage.cloud.CloudOcflStorage;
039import edu.wisc.library.ocfl.core.storage.filesystem.FileSystemOcflStorage;
040import software.amazon.awssdk.services.s3.S3Client;
041
042/**
043 * A set of utility functions for supporting OCFL persistence activities.
044 *
045 * @author dbernstein
046 * @since 6.0.0
047 */
048public class OcflPersistentStorageUtils {
049
050    private static final Logger log = LoggerFactory.getLogger(OcflPersistentStorageUtils.class);
051
052    private OcflPersistentStorageUtils() {
053    }
054
055    /**
056     * The default RDF on disk format
057     * TODO Make this value configurable
058     */
059
060    private static RDFFormat DEFAULT_RDF_FORMAT = NTRIPLES;
061
062    /**
063     * @return the RDF Format. By default NTRIPLES are returned.
064     */
065    public static RDFFormat getRdfFormat() {
066        return DEFAULT_RDF_FORMAT;
067    }
068
069    /**
070     * @return the RDF file extension.
071     */
072    public static String getRDFFileExtension() {
073        return "." + DEFAULT_RDF_FORMAT.getLang().getFileExtensions().get(0);
074    }
075
076    /**
077     * Create a new ocfl repository backed by the filesystem
078     * @param ocflStorageRootDir The ocfl storage root directory
079     * @param ocflWorkDir The ocfl work directory
080     * @param algorithm the algorithm for the OCFL repository
081     * @return the repository
082     */
083    public static MutableOcflRepository createFilesystemRepository(final Path ocflStorageRootDir,
084                                                                   final Path ocflWorkDir,
085                                                                   final org.fcrepo.config.DigestAlgorithm algorithm)
086            throws IOException {
087        createDirectories(ocflStorageRootDir);
088
089        final var storage = FileSystemOcflStorage.builder().repositoryRoot(ocflStorageRootDir).build();
090
091        return createRepository(ocflWorkDir, builder -> {
092            builder.storage(storage);
093        }, algorithm);
094    }
095
096    /**
097     * Create a new ocfl repository backed by s3
098     *
099     * @param dataSource the datasource to keep inventories in and use as a lock
100     * @param s3Client aws s3 client
101     * @param bucket the bucket to store objects in
102     * @param prefix the prefix within the bucket to store objects under
103     * @param ocflWorkDir the local directory to stage objects in
104     * @param algorithm the algorithm for the OCFL repository
105     * @param withDb true if the ocfl client should use a db
106     * @return the repository
107     */
108    public static MutableOcflRepository createS3Repository(final DataSource dataSource,
109                                                           final S3Client s3Client,
110                                                           final String bucket,
111                                                           final String prefix,
112                                                           final Path ocflWorkDir,
113                                                           final org.fcrepo.config.DigestAlgorithm algorithm,
114                                                           final boolean withDb)
115            throws IOException {
116        createDirectories(ocflWorkDir);
117
118        final var storage = CloudOcflStorage.builder()
119                .cloudClient(OcflS3Client.builder()
120                        .s3Client(s3Client)
121                        .bucket(bucket)
122                        .repoPrefix(prefix)
123                        .build())
124                .build();
125
126        return createRepository(ocflWorkDir, builder -> {
127            builder.contentPathConstraints(ContentPathConstraints.cloud())
128                    .storage(storage);
129
130            if (withDb) {
131                builder.objectDetailsDb(db -> db.dataSource(dataSource));
132            }
133
134        }, algorithm);
135    }
136
137    private static MutableOcflRepository createRepository(final Path ocflWorkDir,
138                                                          final Consumer<OcflRepositoryBuilder> configurer,
139                                                          final org.fcrepo.config.DigestAlgorithm algorithm)
140            throws IOException {
141        createDirectories(ocflWorkDir);
142
143        final DigestAlgorithm ocflDigestAlg = translateFedoraDigestToOcfl(algorithm);
144        if (ocflDigestAlg == null) {
145            throw new UnsupportedDigestAlgorithmException(
146                    "Unable to map Fedora default digest algorithm " + algorithm + " into OCFL");
147        }
148
149        final var logicalPathMapper = SystemUtils.IS_OS_WINDOWS ?
150                LogicalPathMappers.percentEncodingWindowsMapper() : LogicalPathMappers.percentEncodingLinuxMapper();
151
152        final var builder = new OcflRepositoryBuilder()
153                .defaultLayoutConfig(new HashedNTupleLayoutConfig())
154                .ocflConfig(new OcflConfig().setDefaultDigestAlgorithm(ocflDigestAlg))
155                .logicalPathMapper(logicalPathMapper)
156                .workDir(ocflWorkDir);
157
158        configurer.accept(builder);
159
160        return builder.buildMutable();
161    }
162
163    /**
164     * @return new object mapper with default config
165     */
166    public static ObjectMapper objectMapper() {
167        return new ObjectMapper()
168                .configure(WRITE_DATES_AS_TIMESTAMPS, false)
169                .registerModule(new JavaTimeModule())
170                .setSerializationInclusion(JsonInclude.Include.NON_NULL);
171    }
172
173    /**
174     * Translates the provided fedora digest algorithm enum into a OCFL client digest algorithm
175     *
176     * @param fcrepoAlg fedora digest algorithm
177     * @return OCFL client DigestAlgorithm, or null if no match could be made
178     */
179    public static DigestAlgorithm translateFedoraDigestToOcfl(final org.fcrepo.config.DigestAlgorithm fcrepoAlg) {
180        return fcrepoAlg.getAliases().stream()
181                .map(alias -> DigestAlgorithmRegistry.getAlgorithm(alias))
182                .filter(alg -> alg != null)
183                .findFirst()
184                .orElse(null);
185    }
186
187    private static Path createDirectories(final Path path) throws IOException {
188        try {
189            return Files.createDirectories(path);
190        } catch (final FileAlreadyExistsException e) {
191            // Ignore. This only happens with the path is a symlink
192            return path;
193        }
194    }
195}