001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.connector.file; 019 020import static java.lang.System.currentTimeMillis; 021import static org.fcrepo.kernel.api.FedoraTypes.CONTENT_DIGEST; 022import static org.fcrepo.kernel.api.FedoraTypes.CONTENT_SIZE; 023import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_BINARY; 024import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_CONTAINER; 025import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_NON_RDF_SOURCE_DESCRIPTION; 026import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_RESOURCE; 027import static org.fcrepo.kernel.api.utils.ContentDigest.DIGEST_ALGORITHM.SHA1; 028import static org.fcrepo.kernel.api.utils.ContentDigest.asURI; 029import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.JCR_CREATED; 030import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.JCR_LASTMODIFIED; 031import static org.modeshape.jcr.api.JcrConstants.JCR_DATA; 032import static org.modeshape.jcr.api.JcrConstants.NT_FILE; 033import static org.modeshape.jcr.api.JcrConstants.NT_FOLDER; 034import static org.modeshape.jcr.api.JcrConstants.NT_RESOURCE; 035 036import java.io.File; 037import java.net.URI; 038import java.util.Date; 039import java.util.HashMap; 040import java.io.IOException; 041import java.util.Map; 042 043import com.google.common.annotations.VisibleForTesting; 044 045import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 046import org.modeshape.connector.filesystem.FileSystemConnector; 047import org.modeshape.jcr.api.value.DateTime; 048import org.modeshape.jcr.api.nodetype.NodeTypeManager; 049import org.modeshape.jcr.spi.federation.DocumentChanges; 050import org.modeshape.jcr.spi.federation.DocumentReader; 051import org.modeshape.jcr.spi.federation.DocumentWriter; 052import org.modeshape.jcr.value.BinaryValue; 053import org.modeshape.jcr.value.Name; 054import org.modeshape.jcr.value.Property; 055import org.modeshape.jcr.value.basic.BasicSingleValueProperty; 056import org.modeshape.schematic.document.Document; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060import javax.jcr.NamespaceRegistry; 061import javax.jcr.RepositoryException; 062 063/** 064 * This class extends the {@link FileSystemConnector} to enable the autocreation of Fedora-specific datastream and 065 * content properties. 066 * 067 * @author Andrew Woods 068 * Date: 1/30/14 069 */ 070public class FedoraFileSystemConnector extends FileSystemConnector { 071 072 private static final Logger LOGGER = LoggerFactory.getLogger(FedoraFileSystemConnector.class); 073 074 private static final String DELIMITER = "/"; 075 private static final String JCR_CONTENT = "jcr:content"; 076 private static final String JCR_CONTENT_SUFFIX = DELIMITER + JCR_CONTENT; 077 078 /** 079 * The string path for a {@link File} object that represents the top-level directory in which properties are 080 * stored. This is optional for this connector, but if set allows properties to be cached (greatly 081 * improving performance) for even read-only connectors. When this property is specified the extraPropertiesStore 082 * should be null (not specified) as it would be overridden by this. 083 */ 084 private String propertiesDirectoryPath; 085 private File propertiesDirectory; 086 087 @Override 088 public void initialize(final NamespaceRegistry registry, 089 final NodeTypeManager nodeTypeManager) throws IOException { 090 LOGGER.warn("FedoraFileSystemConnector will be removed from the core of Fedora in a coming release. " 091 + "See https://jira.duraspace.org/browse/FCREPO-2028 for more information."); 092 try { 093 super.initialize(registry, nodeTypeManager); 094 } catch (final RepositoryException e) { 095 throw new RepositoryRuntimeException("Error initializing FedoraFileSystemConnector!", e); 096 } 097 098 if (propertiesDirectoryPath != null) { 099 propertiesDirectory = new File(propertiesDirectoryPath); 100 if (!propertiesDirectory.exists() || !propertiesDirectory.isDirectory()) { 101 throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath 102 + ", does not exist or is not a directory."); 103 } else if ( !propertiesDirectory.canRead() || !propertiesDirectory.canWrite() ) { 104 throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath 105 + ", should be readable and writable."); 106 } 107 if (extraPropertiesStore() != null) { 108 LOGGER.warn("Extra properties store was specified but won't be used!"); 109 } 110 setExtraPropertiesStore(new ExternalJsonSidecarExtraPropertyStore(this, translator(), propertiesDirectory)); 111 } 112 } 113 114 /** 115 * This method returns the object/document for the node with the federated arg 'id'. 116 * 117 * Additionally, this method adds Fedora datastream and content properties to the result of the parent class 118 * implementation. 119 */ 120 @Override 121 public Document getDocumentById(final String id) { 122 LOGGER.debug("Getting Federated document: {}", id); 123 if (null == id || id.isEmpty()) { 124 LOGGER.warn("Can not get document with null id"); 125 return null; 126 } 127 128 final Document doc = super.getDocumentById(id); 129 if ( doc == null ) { 130 LOGGER.debug("Non-existent node, document is null: {}", id); 131 return doc; 132 } 133 134 final DocumentReader docReader = readDocument(doc); 135 final DocumentWriter docWriter = writeDocument(doc); 136 final long lastmod = fileFor(id).lastModified(); 137 LOGGER.debug("Adding lastModified={}", lastmod); 138 docWriter.addProperty(JCR_LASTMODIFIED, lastmod); 139 140 final String primaryType = docReader.getPrimaryTypeName(); 141 142 if (!docReader.getMixinTypeNames().contains(FEDORA_RESOURCE)) { 143 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_RESOURCE, id); 144 docWriter.addMixinType(FEDORA_RESOURCE); 145 } 146 147 // Is Fedora Datastream? 148 if (primaryType.equals(NT_FILE)) { 149 decorateDatastreamNode(docReader, docWriter); 150 151 // Is Fedora Content? 152 } else if (primaryType.equals(NT_RESOURCE)) { 153 decorateContentNode(docReader, docWriter, fileFor(id)); 154 155 // Is Fedora Object? 156 } else if (primaryType.equals(NT_FOLDER)) { 157 decorateObjectNode(docReader, docWriter); 158 } 159 160 return docWriter.document(); 161 } 162 163 /** 164 * Checks whether internally managed properties can and should be stored to 165 * an ExtraPropertiesStore. 166 * @return whether internally managed properties can and should be stored to 167 */ 168 protected boolean shouldCacheProperties() { 169 return extraPropertiesStore() != null && (!isReadonly() || this.propertiesDirectory != null); 170 } 171 172 173 /** 174 * Pass-thru to the parent class in order to make this function public 175 * 176 * @param id the node ID to test 177 * @return whether the id corresponds to the root location 178 */ 179 @Override 180 public boolean isRoot(final String id) { 181 return super.isRoot(id); 182 } 183 184 /** 185 * Pass-thru to the parent class in order to make this function public 186 * 187 * @param file the file used to compute a sha1 hash 188 * @return the sha1 hash of the file contents 189 */ 190 @Override 191 public String sha1(final File file) { 192 final String cachedSha1 = getCachedSha1(file); 193 if (cachedSha1 == null) { 194 return computeAndCacheSha1(file); 195 } 196 return cachedSha1; 197 } 198 199 private String getCachedSha1(final File file) { 200 final String id = idFor(file) + JCR_CONTENT_SUFFIX; 201 if (extraPropertiesStore() != null) { 202 final Map<Name, Property> extraProperties = extraPropertiesStore().getProperties(id); 203 final Name digestName = nameFrom(CONTENT_DIGEST); 204 if (extraProperties.containsKey(digestName)) { 205 if (!hasBeenModifiedSincePropertiesWereStored(file, extraProperties.get(nameFrom(JCR_CREATED)))) { 206 LOGGER.trace("Found sha1 for {} in extra properties store.", id); 207 final String uriStr = ((URI) extraProperties.get(digestName).getFirstValue()).toString(); 208 return uriStr.substring(uriStr.indexOf("sha1:") + 5); 209 } 210 } 211 } else { 212 LOGGER.trace("No cache configured to contain object hashes."); 213 } 214 return null; 215 } 216 217 private String computeAndCacheSha1(final File file) { 218 final String id = idFor(file) + JCR_CONTENT_SUFFIX; 219 LOGGER.trace("Computing sha1 for {}.", id); 220 final String sha1 = super.sha1(file); 221 if (shouldCacheProperties()) { 222 final Map<Name, Property> updateMap = new HashMap<>(); 223 final Property digestProperty = new BasicSingleValueProperty(nameFrom(CONTENT_DIGEST), 224 asURI(SHA1.algorithm, sha1)); 225 final Property digestDateProperty = new BasicSingleValueProperty(nameFrom(JCR_CREATED), 226 factories().getDateFactory().create(file.lastModified())); 227 updateMap.put(digestProperty.getName(), digestProperty); 228 updateMap.put(digestDateProperty.getName(), digestDateProperty); 229 extraPropertiesStore().updateProperties(id, updateMap); 230 } 231 return sha1; 232 } 233 234 private static void decorateObjectNode(final DocumentReader docReader, final DocumentWriter docWriter) { 235 if (!docReader.getMixinTypeNames().contains(FEDORA_CONTAINER)) { 236 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_CONTAINER, docReader.getDocumentId()); 237 docWriter.addMixinType(FEDORA_CONTAINER); 238 } 239 } 240 241 private static void decorateDatastreamNode(final DocumentReader docReader, final DocumentWriter docWriter) { 242 if (!docReader.getMixinTypeNames().contains(FEDORA_NON_RDF_SOURCE_DESCRIPTION)) { 243 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_NON_RDF_SOURCE_DESCRIPTION, docReader.getDocumentId()); 244 docWriter.addMixinType(FEDORA_NON_RDF_SOURCE_DESCRIPTION); 245 } 246 } 247 248 private static void decorateContentNode(final DocumentReader docReader, 249 final DocumentWriter docWriter, 250 final File file) { 251 if (!docReader.getMixinTypeNames().contains(FEDORA_BINARY)) { 252 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_BINARY, docReader.getDocumentId()); 253 docWriter.addMixinType(FEDORA_BINARY); 254 } 255 256 if (null == docReader.getProperty(CONTENT_DIGEST) 257 || hasBeenModifiedSincePropertiesWereStored(file, docReader.getProperty(JCR_CREATED))) { 258 final BinaryValue binaryValue = getBinaryValue(docReader); 259 final String dsChecksum = binaryValue.getHexHash(); 260 final String dsURI = asURI(SHA1.algorithm, dsChecksum).toString(); 261 262 LOGGER.trace("Adding {} property of {} to {}", CONTENT_DIGEST, dsURI, docReader.getDocumentId()); 263 docWriter.addProperty(CONTENT_DIGEST, dsURI); 264 } 265 266 if (null == docReader.getProperty(CONTENT_SIZE)) { 267 final long binarySize = file.length(); 268 LOGGER.trace("Adding {} property of {} to {}", CONTENT_SIZE, binarySize, docReader.getDocumentId()); 269 docWriter.addProperty(CONTENT_SIZE, binarySize); 270 } 271 272 LOGGER.debug("Decorated data property at path: {}", docReader.getDocumentId()); 273 } 274 275 private static boolean hasBeenModifiedSincePropertiesWereStored(final File file, final Property lastModified) { 276 if (lastModified == null) { 277 LOGGER.trace("Hash for {} has not been computed yet.", file.getName()); 278 return true; 279 } 280 final DateTime datetime = (DateTime) lastModified.getFirstValue(); 281 if (datetime.toDate().equals(new Date(file.lastModified()))) { 282 return false; 283 } 284 LOGGER.trace("{} has been modified ({}) since hash was last computed ({}).", file.getName(), 285 new Date(file.lastModified()), datetime.toDate()); 286 return true; 287 } 288 289 private static BinaryValue getBinaryValue(final DocumentReader docReader) { 290 final Property binaryProperty = docReader.getProperty(JCR_DATA); 291 return (BinaryValue) binaryProperty.getFirstValue(); 292 } 293 294 /* Override write operations to also update the parent file's timestamp, so 295 its Last-Modified header correctly reflects changes to children. */ 296 @Override 297 public boolean removeDocument( final String id ) { 298 if ( super.removeDocument(id) ) { 299 touchParent(id); 300 return true; 301 } 302 return false; 303 } 304 305 @Override 306 public void storeDocument( final Document document ) { 307 super.storeDocument( document ); 308 touchParent(readDocument(document).getDocumentId()); 309 } 310 311 @Override 312 public void updateDocument( final DocumentChanges changes ) { 313 super.updateDocument( changes ); 314 touchParent( changes.getDocumentId() ); 315 } 316 317 /** 318 * Find the parent file, and set its timestamp to the current time. This 319 * timestamp will be used for populating the Last-Modified header. 320 * @param id the id 321 **/ 322 protected void touchParent( final String id ) { 323 if (!isRoot(id)) { 324 final File file = fileFor(id); 325 final File parent = file.getParentFile(); 326 parent.setLastModified(currentTimeMillis()); 327 } 328 } 329 330 /* Overriding so unit test can mock. */ 331 @Override 332 @VisibleForTesting 333 protected File fileFor( final String id ) { 334 return super.fileFor(id); 335 } 336 @Override 337 @VisibleForTesting 338 protected DocumentReader readDocument( final Document document ) { 339 return super.readDocument(document); 340 } 341 342 /* Overriding to make the FedoraFileSystemConnector is always read-only. */ 343 @Override 344 public boolean isReadonly() { 345 return true; 346 } 347 348 @Override 349 public boolean isContentNode(final String id) { 350 return super.isContentNode(id); 351 } 352 353}