001/*
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 */
006package org.fcrepo.persistence.ocfl.impl;
007
008import static com.fasterxml.jackson.databind.SerializationFeature.WRITE_DATES_AS_TIMESTAMPS;
009import static org.apache.jena.riot.RDFFormat.NTRIPLES;
010
011import java.io.IOException;
012import java.nio.file.FileAlreadyExistsException;
013import java.nio.file.Files;
014import java.nio.file.Path;
015import java.util.function.Consumer;
016
017import javax.sql.DataSource;
018
019import com.fasterxml.jackson.annotation.JsonInclude;
020import com.fasterxml.jackson.databind.ObjectMapper;
021import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
022import edu.wisc.library.ocfl.api.DigestAlgorithmRegistry;
023import edu.wisc.library.ocfl.api.MutableOcflRepository;
024import edu.wisc.library.ocfl.api.OcflConfig;
025import edu.wisc.library.ocfl.api.model.DigestAlgorithm;
026import edu.wisc.library.ocfl.aws.OcflS3Client;
027import edu.wisc.library.ocfl.core.OcflRepositoryBuilder;
028import edu.wisc.library.ocfl.core.extension.storage.layout.config.HashedNTupleLayoutConfig;
029import edu.wisc.library.ocfl.core.path.constraint.ContentPathConstraints;
030import edu.wisc.library.ocfl.core.path.mapper.LogicalPathMappers;
031import edu.wisc.library.ocfl.core.storage.OcflStorageBuilder;
032import org.apache.commons.lang3.SystemUtils;
033import org.apache.http.impl.auth.UnsupportedDigestAlgorithmException;
034import org.apache.jena.riot.RDFFormat;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037import software.amazon.awssdk.services.s3.S3Client;
038
039/**
040 * A set of utility functions for supporting OCFL persistence activities.
041 *
042 * @author dbernstein
043 * @since 6.0.0
044 */
045public class OcflPersistentStorageUtils {
046
047    private static final Logger log = LoggerFactory.getLogger(OcflPersistentStorageUtils.class);
048
049    private OcflPersistentStorageUtils() {
050    }
051
052    /**
053     * The default RDF on disk format
054     * TODO Make this value configurable
055     */
056
057    private static RDFFormat DEFAULT_RDF_FORMAT = NTRIPLES;
058
059    /**
060     * @return the RDF Format. By default NTRIPLES are returned.
061     */
062    public static RDFFormat getRdfFormat() {
063        return DEFAULT_RDF_FORMAT;
064    }
065
066    /**
067     * @return the RDF file extension.
068     */
069    public static String getRDFFileExtension() {
070        return "." + DEFAULT_RDF_FORMAT.getLang().getFileExtensions().get(0);
071    }
072
073    /**
074     * Create a new ocfl repository backed by the filesystem
075     * @param ocflStorageRootDir The ocfl storage root directory
076     * @param ocflWorkDir The ocfl work directory
077     * @param algorithm the algorithm for the OCFL repository
078     * @return the repository
079     */
080    public static MutableOcflRepository createFilesystemRepository(final Path ocflStorageRootDir,
081                                                                   final Path ocflWorkDir,
082                                                                   final org.fcrepo.config.DigestAlgorithm algorithm)
083            throws IOException {
084        createDirectories(ocflStorageRootDir);
085
086        final var storage = OcflStorageBuilder.builder().fileSystem(ocflStorageRootDir).build();
087
088        return createRepository(ocflWorkDir, builder -> {
089            builder.storage(storage);
090        }, algorithm);
091    }
092
093    /**
094     * Create a new ocfl repository backed by s3
095     *
096     * @param dataSource the datasource to keep inventories in and use as a lock
097     * @param s3Client aws s3 client
098     * @param bucket the bucket to store objects in
099     * @param prefix the prefix within the bucket to store objects under
100     * @param ocflWorkDir the local directory to stage objects in
101     * @param algorithm the algorithm for the OCFL repository
102     * @param withDb true if the ocfl client should use a db
103     * @return the repository
104     */
105    public static MutableOcflRepository createS3Repository(final DataSource dataSource,
106                                                           final S3Client s3Client,
107                                                           final String bucket,
108                                                           final String prefix,
109                                                           final Path ocflWorkDir,
110                                                           final org.fcrepo.config.DigestAlgorithm algorithm,
111                                                           final boolean withDb)
112            throws IOException {
113        createDirectories(ocflWorkDir);
114
115        final var storage = OcflStorageBuilder.builder().cloud(OcflS3Client.builder()
116                .s3Client(s3Client)
117                .bucket(bucket)
118                .repoPrefix(prefix)
119                .build())
120                .build();
121
122        return createRepository(ocflWorkDir, builder -> {
123            builder.contentPathConstraints(ContentPathConstraints.cloud())
124                    .storage(storage);
125
126            if (withDb) {
127                builder.objectDetailsDb(db -> db.dataSource(dataSource));
128            }
129
130        }, algorithm);
131    }
132
133    private static MutableOcflRepository createRepository(final Path ocflWorkDir,
134                                                          final Consumer<OcflRepositoryBuilder> configurer,
135                                                          final org.fcrepo.config.DigestAlgorithm algorithm)
136            throws IOException {
137        createDirectories(ocflWorkDir);
138
139        final DigestAlgorithm ocflDigestAlg = translateFedoraDigestToOcfl(algorithm);
140        if (ocflDigestAlg == null) {
141            throw new UnsupportedDigestAlgorithmException(
142                    "Unable to map Fedora default digest algorithm " + algorithm + " into OCFL");
143        }
144
145        final var logicalPathMapper = SystemUtils.IS_OS_WINDOWS ?
146                LogicalPathMappers.percentEncodingWindowsMapper() : LogicalPathMappers.percentEncodingLinuxMapper();
147
148        final var builder = new OcflRepositoryBuilder()
149                .defaultLayoutConfig(new HashedNTupleLayoutConfig())
150                .ocflConfig(new OcflConfig().setDefaultDigestAlgorithm(ocflDigestAlg))
151                .logicalPathMapper(logicalPathMapper)
152                .workDir(ocflWorkDir);
153
154        configurer.accept(builder);
155
156        return builder.buildMutable();
157    }
158
159    /**
160     * @return new object mapper with default config
161     */
162    public static ObjectMapper objectMapper() {
163        return new ObjectMapper()
164                .configure(WRITE_DATES_AS_TIMESTAMPS, false)
165                .registerModule(new JavaTimeModule())
166                .setSerializationInclusion(JsonInclude.Include.NON_NULL);
167    }
168
169    /**
170     * Translates the provided fedora digest algorithm enum into a OCFL client digest algorithm
171     *
172     * @param fcrepoAlg fedora digest algorithm
173     * @return OCFL client DigestAlgorithm, or null if no match could be made
174     */
175    public static DigestAlgorithm translateFedoraDigestToOcfl(final org.fcrepo.config.DigestAlgorithm fcrepoAlg) {
176        return fcrepoAlg.getAliases().stream()
177                .map(alias -> DigestAlgorithmRegistry.getAlgorithm(alias))
178                .filter(alg -> alg != null)
179                .findFirst()
180                .orElse(null);
181    }
182
183    private static Path createDirectories(final Path path) throws IOException {
184        try {
185            return Files.createDirectories(path);
186        } catch (final FileAlreadyExistsException e) {
187            // Ignore. This only happens with the path is a symlink
188            return path;
189        }
190    }
191}