001/*
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 */
006package org.fcrepo.persistence.ocfl.impl;
007
008import static com.fasterxml.jackson.databind.SerializationFeature.WRITE_DATES_AS_TIMESTAMPS;
009import static org.apache.jena.riot.RDFFormat.NTRIPLES;
010
011import java.io.IOException;
012import java.nio.file.FileAlreadyExistsException;
013import java.nio.file.Files;
014import java.nio.file.Path;
015import java.util.function.Consumer;
016
017import javax.sql.DataSource;
018
019import com.fasterxml.jackson.annotation.JsonInclude;
020import com.fasterxml.jackson.databind.ObjectMapper;
021import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
022import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry;
023import edu.wisc.library.ocfl.api.MutableOcflRepository;
024import edu.wisc.library.ocfl.api.model.DigestAlgorithm;
025import edu.wisc.library.ocfl.api.model.OcflVersion;
026import edu.wisc.library.ocfl.aws.OcflS3Client;
027import edu.wisc.library.ocfl.core.OcflRepositoryBuilder;
028import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig;
029import edu.wisc.library.ocfl.core.path.constraint.ContentPathConstraints;
030import edu.wisc.library.ocfl.core.path.mapper.LogicalPathMappers;
031import edu.wisc.library.ocfl.core.storage.OcflStorageBuilder;
032import org.apache.commons.lang3.SystemUtils;
033import org.apache.http.impl.auth.UnsupportedDigestAlgorithmException;
034import org.apache.jena.riot.RDFFormat;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037import software.amazon.awssdk.services.s3.S3Client;
038
039/**
040 * A set of utility functions for supporting OCFL persistence activities.
041 *
042 * @author dbernstein
043 * @since 6.0.0
044 */
045public class OcflPersistentStorageUtils {
046
047    private static final Logger log = LoggerFactory.getLogger(OcflPersistentStorageUtils.class);
048
049    private OcflPersistentStorageUtils() {
050    }
051
052    /**
053     * The version of OCFL the repository is configured to use.
054     */
055    private static final OcflVersion OCFL_VERSION = OcflVersion.OCFL_1_1;
056
057    /**
058     * The default RDF on disk format
059     * TODO Make this value configurable
060     */
061    private static RDFFormat DEFAULT_RDF_FORMAT = NTRIPLES;
062
063    /**
064     * @return the RDF Format. By default NTRIPLES are returned.
065     */
066    public static RDFFormat getRdfFormat() {
067        return DEFAULT_RDF_FORMAT;
068    }
069
070    /**
071     * @return the RDF file extension.
072     */
073    public static String getRDFFileExtension() {
074        return "." + DEFAULT_RDF_FORMAT.getLang().getFileExtensions().get(0);
075    }
076
077    /**
078     * Create a new ocfl repository backed by the filesystem
079     * @param ocflStorageRootDir The ocfl storage root directory
080     * @param ocflWorkDir The ocfl work directory
081     * @param algorithm the algorithm for the OCFL repository
082     * @param ocflUpgradeOnWrite true if we want to write new versions on older objects.
083     * @param verifyInventory true if we should verify the inventory
084     * @return the repository
085     */
086    public static MutableOcflRepository createFilesystemRepository(final Path ocflStorageRootDir,
087                                                                   final Path ocflWorkDir,
088                                                                   final org.fcrepo.config.DigestAlgorithm algorithm,
089                                                                   final boolean ocflUpgradeOnWrite,
090                                                                   final boolean verifyInventory)
091            throws IOException {
092        createDirectories(ocflStorageRootDir);
093
094        final var storage = OcflStorageBuilder.builder()
095                                              .verifyInventoryDigest(verifyInventory)
096                                              .fileSystem(ocflStorageRootDir).build();
097
098        return createRepository(ocflWorkDir, builder -> {
099            builder.storage(storage);
100        }, algorithm, ocflUpgradeOnWrite);
101    }
102
103    /**
104     * Create a new ocfl repository backed by s3
105     *
106     * @param dataSource the datasource to keep inventories in and use as a lock
107     * @param s3Client aws s3 client
108     * @param bucket the bucket to store objects in
109     * @param prefix the prefix within the bucket to store objects under
110     * @param ocflWorkDir the local directory to stage objects in
111     * @param algorithm the algorithm for the OCFL repository
112     * @param withDb true if the ocfl client should use a db
113     * @param ocflUpgradeOnWrite true if we want to write new versions on older objects.
114     * @param verifyInventory true if we should verify the ocfl inventory
115     * @return the repository
116     */
117    public static MutableOcflRepository createS3Repository(final DataSource dataSource,
118                                                           final S3Client s3Client,
119                                                           final String bucket,
120                                                           final String prefix,
121                                                           final Path ocflWorkDir,
122                                                           final org.fcrepo.config.DigestAlgorithm algorithm,
123                                                           final boolean withDb,
124                                                           final boolean ocflUpgradeOnWrite,
125                                                           final boolean verifyInventory)
126            throws IOException {
127        createDirectories(ocflWorkDir);
128
129        final var storage = OcflStorageBuilder.builder()
130            .verifyInventoryDigest(verifyInventory)
131            .cloud(OcflS3Client.builder()
132                .s3Client(s3Client)
133                .bucket(bucket)
134                .repoPrefix(prefix)
135                .build())
136                .build();
137
138        return createRepository(ocflWorkDir, builder -> {
139            builder.contentPathConstraints(ContentPathConstraints.cloud())
140                    .storage(storage);
141
142            if (withDb) {
143                builder.objectDetailsDb(db -> db.dataSource(dataSource));
144            }
145
146        }, algorithm, ocflUpgradeOnWrite);
147    }
148
149    private static MutableOcflRepository createRepository(final Path ocflWorkDir,
150                                                          final Consumer<OcflRepositoryBuilder> configurer,
151                                                          final org.fcrepo.config.DigestAlgorithm algorithm,
152                                                          final boolean ocflUpgradeOnWrite)
153            throws IOException {
154        createDirectories(ocflWorkDir);
155
156        final DigestAlgorithm ocflDigestAlg = translateFedoraDigestToOcfl(algorithm);
157        if (ocflDigestAlg == null) {
158            throw new UnsupportedDigestAlgorithmException(
159                    "Unable to map Fedora default digest algorithm " + algorithm + " into OCFL");
160        }
161
162        final var logicalPathMapper = SystemUtils.IS_OS_WINDOWS ?
163                LogicalPathMappers.percentEncodingWindowsMapper() : LogicalPathMappers.percentEncodingLinuxMapper();
164
165        final var builder = new OcflRepositoryBuilder()
166                .defaultLayoutConfig(new HashedNTupleLayoutConfig())
167                .ocflConfig(config -> config.setDefaultDigestAlgorithm(ocflDigestAlg)
168                        .setOcflVersion(OCFL_VERSION)
169                        .setUpgradeObjectsOnWrite(ocflUpgradeOnWrite))
170                .logicalPathMapper(logicalPathMapper)
171                .workDir(ocflWorkDir);
172
173        configurer.accept(builder);
174
175        return builder.buildMutable();
176    }
177
178    /**
179     * @return new object mapper with default config
180     */
181    public static ObjectMapper objectMapper() {
182        return new ObjectMapper()
183                .configure(WRITE_DATES_AS_TIMESTAMPS, false)
184                .registerModule(new JavaTimeModule())
185                .setSerializationInclusion(JsonInclude.Include.NON_NULL);
186    }
187
188    /**
189     * Translates the provided fedora digest algorithm enum into a OCFL client digest algorithm
190     *
191     * @param fcrepoAlg fedora digest algorithm
192     * @return OCFL client DigestAlgorithm, or null if no match could be made
193     */
194    public static DigestAlgorithm translateFedoraDigestToOcfl(final org.fcrepo.config.DigestAlgorithm fcrepoAlg) {
195        return fcrepoAlg.getAliases().stream()
196                .map(alias -> DigestAlgorithmRegistry.getAlgorithm(alias))
197                .filter(alg -> alg != null)
198                .findFirst()
199                .orElse(null);
200    }
201
202    private static Path createDirectories(final Path path) throws IOException {
203        try {
204            return Files.createDirectories(path);
205        } catch (final FileAlreadyExistsException e) {
206            // Ignore. This only happens with the path is a symlink
207            return path;
208        }
209    }
210}