001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.connector.file;
019
020import static java.lang.System.currentTimeMillis;
021import static org.fcrepo.kernel.api.FedoraTypes.CONTENT_DIGEST;
022import static org.fcrepo.kernel.api.FedoraTypes.CONTENT_SIZE;
023import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_BINARY;
024import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_CONTAINER;
025import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_NON_RDF_SOURCE_DESCRIPTION;
026import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_RESOURCE;
027import static org.fcrepo.kernel.api.utils.ContentDigest.DIGEST_ALGORITHM.SHA1;
028import static org.fcrepo.kernel.api.utils.ContentDigest.asURI;
029import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.JCR_CREATED;
030import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.JCR_LASTMODIFIED;
031import static org.modeshape.jcr.api.JcrConstants.JCR_DATA;
032import static org.modeshape.jcr.api.JcrConstants.NT_FILE;
033import static org.modeshape.jcr.api.JcrConstants.NT_FOLDER;
034import static org.modeshape.jcr.api.JcrConstants.NT_RESOURCE;
035
036import java.io.File;
037import java.net.URI;
038import java.util.Date;
039import java.util.HashMap;
040import java.io.IOException;
041import java.util.Map;
042
043import com.google.common.annotations.VisibleForTesting;
044
045import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
046import org.modeshape.connector.filesystem.FileSystemConnector;
047import org.modeshape.jcr.api.value.DateTime;
048import org.modeshape.jcr.api.nodetype.NodeTypeManager;
049import org.modeshape.jcr.spi.federation.DocumentChanges;
050import org.modeshape.jcr.spi.federation.DocumentReader;
051import org.modeshape.jcr.spi.federation.DocumentWriter;
052import org.modeshape.jcr.value.BinaryValue;
053import org.modeshape.jcr.value.Name;
054import org.modeshape.jcr.value.Property;
055import org.modeshape.jcr.value.basic.BasicSingleValueProperty;
056import org.modeshape.schematic.document.Document;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import javax.jcr.NamespaceRegistry;
061import javax.jcr.RepositoryException;
062
063/**
064 * This class extends the {@link FileSystemConnector} to enable the autocreation of Fedora-specific datastream and
065 * content properties.
066 *
067 * @author Andrew Woods
068 *         Date: 1/30/14
069 */
070public class FedoraFileSystemConnector extends FileSystemConnector {
071
072    private static final Logger LOGGER = LoggerFactory.getLogger(FedoraFileSystemConnector.class);
073
074    private static final String DELIMITER = "/";
075    private static final String JCR_CONTENT = "jcr:content";
076    private static final String JCR_CONTENT_SUFFIX = DELIMITER + JCR_CONTENT;
077
078    /**
079     * The string path for a {@link File} object that represents the top-level directory in which properties are
080     * stored.  This is optional for this connector, but if set allows properties to be cached (greatly
081     * improving performance) for even read-only connectors.  When this property is specified the extraPropertiesStore
082     * should be null (not specified) as it would be overridden by this.
083     */
084    private String propertiesDirectoryPath;
085    private File propertiesDirectory;
086
087    @Override
088    public void initialize(final NamespaceRegistry registry,
089                           final NodeTypeManager nodeTypeManager) throws IOException {
090        LOGGER.warn("FedoraFileSystemConnector will be removed from the core of Fedora in a coming release. "
091            + "See https://jira.duraspace.org/browse/FCREPO-2028 for more information.");
092        try {
093            super.initialize(registry, nodeTypeManager);
094        } catch (final RepositoryException e) {
095            throw new RepositoryRuntimeException("Error initializing FedoraFileSystemConnector!", e);
096        }
097
098        if (propertiesDirectoryPath != null) {
099           propertiesDirectory = new File(propertiesDirectoryPath);
100            if (!propertiesDirectory.exists() || !propertiesDirectory.isDirectory()) {
101                throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath
102                        + ", does not exist or is not a directory.");
103            } else if ( !propertiesDirectory.canRead() || !propertiesDirectory.canWrite() ) {
104                throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath
105                        + ", should be readable and writable.");
106            }
107            if (extraPropertiesStore() != null) {
108                LOGGER.warn("Extra properties store was specified but won't be used!");
109            }
110            setExtraPropertiesStore(new ExternalJsonSidecarExtraPropertyStore(this, translator(), propertiesDirectory));
111        }
112    }
113
114    /**
115     * This method returns the object/document for the node with the federated arg 'id'.
116     *
117     * Additionally, this method adds Fedora datastream and content properties to the result of the parent class
118     * implementation.
119     */
120    @Override
121    public Document getDocumentById(final String id) {
122        LOGGER.debug("Getting Federated document: {}", id);
123        if (null == id || id.isEmpty()) {
124            LOGGER.warn("Can not get document with null id");
125            return null;
126        }
127
128        final Document doc = super.getDocumentById(id);
129        if ( doc == null ) {
130            LOGGER.debug("Non-existent node, document is null: {}", id);
131            return doc;
132        }
133
134        final DocumentReader docReader = readDocument(doc);
135        final DocumentWriter docWriter = writeDocument(doc);
136        final long lastmod = fileFor(id).lastModified();
137        LOGGER.debug("Adding lastModified={}", lastmod);
138        docWriter.addProperty(JCR_LASTMODIFIED, lastmod);
139
140        final String primaryType = docReader.getPrimaryTypeName();
141
142        if (!docReader.getMixinTypeNames().contains(FEDORA_RESOURCE)) {
143            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_RESOURCE, id);
144            docWriter.addMixinType(FEDORA_RESOURCE);
145        }
146
147        // Is Fedora Datastream?
148        if (primaryType.equals(NT_FILE)) {
149            decorateDatastreamNode(docReader, docWriter);
150
151        // Is Fedora Content?
152        } else if (primaryType.equals(NT_RESOURCE)) {
153            decorateContentNode(docReader, docWriter, fileFor(id));
154
155        // Is Fedora Object?
156        } else if (primaryType.equals(NT_FOLDER)) {
157            decorateObjectNode(docReader, docWriter);
158        }
159
160        return docWriter.document();
161    }
162
163    /**
164     * Checks whether internally managed properties can and should be stored to
165     * an ExtraPropertiesStore.
166     * @return whether internally managed properties can and should be stored to
167     */
168    protected boolean shouldCacheProperties() {
169        return extraPropertiesStore() != null && (!isReadonly() || this.propertiesDirectory != null);
170    }
171
172
173    /**
174     * Pass-thru to the parent class in order to make this function public
175     *
176     * @param id the node ID to test
177     * @return whether the id corresponds to the root location
178     */
179    @Override
180    public boolean isRoot(final String id) {
181        return super.isRoot(id);
182    }
183
184    /**
185     * Pass-thru to the parent class in order to make this function public
186     *
187     * @param file the file used to compute a sha1 hash
188     * @return the sha1 hash of the file contents
189     */
190    @Override
191    public String sha1(final File file) {
192        final String cachedSha1 = getCachedSha1(file);
193        if (cachedSha1 == null) {
194            return computeAndCacheSha1(file);
195        }
196        return cachedSha1;
197    }
198
199    private String getCachedSha1(final File file) {
200        final String id = idFor(file) + JCR_CONTENT_SUFFIX;
201        if (extraPropertiesStore() != null) {
202            final Map<Name, Property> extraProperties = extraPropertiesStore().getProperties(id);
203            final Name digestName = nameFrom(CONTENT_DIGEST);
204            if (extraProperties.containsKey(digestName)) {
205                if (!hasBeenModifiedSincePropertiesWereStored(file, extraProperties.get(nameFrom(JCR_CREATED)))) {
206                    LOGGER.trace("Found sha1 for {} in extra properties store.", id);
207                    final String uriStr = ((URI) extraProperties.get(digestName).getFirstValue()).toString();
208                    return uriStr.substring(uriStr.indexOf("sha1:") + 5);
209                }
210            }
211        } else {
212            LOGGER.trace("No cache configured to contain object hashes.");
213        }
214        return null;
215    }
216
217    private String computeAndCacheSha1(final File file) {
218        final String id = idFor(file) + JCR_CONTENT_SUFFIX;
219        LOGGER.trace("Computing sha1 for {}.", id);
220        final String sha1 = super.sha1(file);
221        if (shouldCacheProperties()) {
222            final Map<Name, Property> updateMap = new HashMap<>();
223            final Property digestProperty = new BasicSingleValueProperty(nameFrom(CONTENT_DIGEST),
224                    asURI(SHA1.algorithm, sha1));
225            final Property digestDateProperty = new BasicSingleValueProperty(nameFrom(JCR_CREATED),
226                    factories().getDateFactory().create(file.lastModified()));
227            updateMap.put(digestProperty.getName(), digestProperty);
228            updateMap.put(digestDateProperty.getName(), digestDateProperty);
229            extraPropertiesStore().updateProperties(id, updateMap);
230        }
231        return sha1;
232    }
233
234    private static void decorateObjectNode(final DocumentReader docReader, final DocumentWriter docWriter) {
235        if (!docReader.getMixinTypeNames().contains(FEDORA_CONTAINER)) {
236            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_CONTAINER, docReader.getDocumentId());
237            docWriter.addMixinType(FEDORA_CONTAINER);
238        }
239    }
240
241    private static void decorateDatastreamNode(final DocumentReader docReader, final DocumentWriter docWriter) {
242        if (!docReader.getMixinTypeNames().contains(FEDORA_NON_RDF_SOURCE_DESCRIPTION)) {
243            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_NON_RDF_SOURCE_DESCRIPTION, docReader.getDocumentId());
244            docWriter.addMixinType(FEDORA_NON_RDF_SOURCE_DESCRIPTION);
245        }
246    }
247
248    private static void decorateContentNode(final DocumentReader docReader,
249                                            final DocumentWriter docWriter,
250                                            final File file) {
251        if (!docReader.getMixinTypeNames().contains(FEDORA_BINARY)) {
252            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_BINARY, docReader.getDocumentId());
253            docWriter.addMixinType(FEDORA_BINARY);
254        }
255
256        if (null == docReader.getProperty(CONTENT_DIGEST)
257                || hasBeenModifiedSincePropertiesWereStored(file, docReader.getProperty(JCR_CREATED))) {
258            final BinaryValue binaryValue = getBinaryValue(docReader);
259            final String dsChecksum = binaryValue.getHexHash();
260            final String dsURI = asURI(SHA1.algorithm, dsChecksum).toString();
261
262            LOGGER.trace("Adding {} property of {} to {}", CONTENT_DIGEST, dsURI, docReader.getDocumentId());
263            docWriter.addProperty(CONTENT_DIGEST, dsURI);
264        }
265
266        if (null == docReader.getProperty(CONTENT_SIZE)) {
267            final long binarySize = file.length();
268            LOGGER.trace("Adding {} property of {} to {}", CONTENT_SIZE, binarySize, docReader.getDocumentId());
269            docWriter.addProperty(CONTENT_SIZE, binarySize);
270        }
271
272        LOGGER.debug("Decorated data property at path: {}", docReader.getDocumentId());
273    }
274
275    private static boolean hasBeenModifiedSincePropertiesWereStored(final File file, final Property lastModified) {
276        if (lastModified == null) {
277            LOGGER.trace("Hash for {} has not been computed yet.", file.getName());
278            return true;
279        }
280        final DateTime datetime = (DateTime) lastModified.getFirstValue();
281        if (datetime.toDate().equals(new Date(file.lastModified()))) {
282            return false;
283        }
284        LOGGER.trace("{} has been modified ({}) since hash was last computed ({}).", file.getName(),
285                new Date(file.lastModified()), datetime.toDate());
286        return true;
287    }
288
289    private static BinaryValue getBinaryValue(final DocumentReader docReader) {
290        final Property binaryProperty = docReader.getProperty(JCR_DATA);
291        return (BinaryValue) binaryProperty.getFirstValue();
292    }
293
294    /* Override write operations to also update the parent file's timestamp, so
295       its Last-Modified header correctly reflects changes to children. */
296    @Override
297    public boolean removeDocument( final String id ) {
298        if ( super.removeDocument(id) ) {
299            touchParent(id);
300            return true;
301        }
302        return false;
303    }
304
305    @Override
306    public void storeDocument( final Document document ) {
307        super.storeDocument( document );
308        touchParent(readDocument(document).getDocumentId());
309    }
310
311    @Override
312    public void updateDocument( final DocumentChanges changes ) {
313        super.updateDocument( changes );
314        touchParent( changes.getDocumentId() );
315    }
316
317    /**
318     * Find the parent file, and set its timestamp to the current time.  This
319     * timestamp will be used for populating the Last-Modified header.
320     * @param id the id
321    **/
322    protected void touchParent( final String id ) {
323        if (!isRoot(id)) {
324            final File file = fileFor(id);
325            final File parent = file.getParentFile();
326            parent.setLastModified(currentTimeMillis());
327        }
328    }
329
330    /* Overriding so unit test can mock. */
331    @Override
332    @VisibleForTesting
333    protected File fileFor( final String id ) {
334        return super.fileFor(id);
335    }
336    @Override
337    @VisibleForTesting
338    protected DocumentReader readDocument( final Document document ) {
339        return super.readDocument(document);
340    }
341
342    /* Overriding to make the FedoraFileSystemConnector is always read-only. */
343    @Override
344    public boolean isReadonly() {
345        return true;
346    }
347
348    @Override
349    public boolean isContentNode(final String id) {
350        return super.isContentNode(id);
351    }
352
353}