001/* 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 */ 006package org.fcrepo.persistence.ocfl.impl; 007 008import static org.apache.jena.graph.NodeFactory.createURI; 009import static org.apache.jena.rdf.model.ModelFactory.createDefaultModel; 010import static org.fcrepo.kernel.api.RdfLexicon.NON_RDF_SOURCE; 011import static org.fcrepo.persistence.ocfl.impl.OcflPersistentStorageUtils.getRdfFormat; 012import static org.slf4j.LoggerFactory.getLogger; 013 014import java.io.IOException; 015import java.io.InputStream; 016import java.util.ArrayList; 017import java.util.List; 018import java.util.Optional; 019import java.util.concurrent.Callable; 020import java.util.concurrent.atomic.AtomicReference; 021 022import javax.inject.Inject; 023import javax.validation.constraints.NotNull; 024 025import org.fcrepo.config.FedoraPropsConfig; 026import org.fcrepo.kernel.api.ContainmentIndex; 027import org.fcrepo.kernel.api.RdfLexicon; 028import org.fcrepo.kernel.api.RdfStream; 029import org.fcrepo.kernel.api.Transaction; 030import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 031import org.fcrepo.kernel.api.identifiers.FedoraId; 032import org.fcrepo.kernel.api.models.ResourceHeaders; 033import org.fcrepo.kernel.api.rdf.DefaultRdfStream; 034import org.fcrepo.kernel.api.services.MembershipService; 035import org.fcrepo.kernel.api.services.ReferenceService; 036import org.fcrepo.persistence.api.PersistentStorageSessionManager; 037import org.fcrepo.persistence.ocfl.api.FedoraToOcflObjectIndex; 038import org.fcrepo.search.api.Condition; 039import org.fcrepo.search.api.InvalidQueryException; 040import org.fcrepo.search.api.SearchIndex; 041import org.fcrepo.search.api.SearchParameters; 042import org.fcrepo.storage.ocfl.OcflObjectSessionFactory; 043import org.fcrepo.storage.ocfl.validation.ObjectValidator; 044 045import org.apache.jena.rdf.model.Model; 046import org.apache.jena.riot.RDFDataMgr; 047import org.slf4j.Logger; 048import org.springframework.beans.factory.annotation.Autowired; 049import org.springframework.beans.factory.annotation.Qualifier; 050import org.springframework.stereotype.Component; 051 052/** 053 * Service that does the reindexing for one OCFL object. 054 * @author whikloj 055 */ 056@Component 057public class ReindexService { 058 059 @Inject 060 private PersistentStorageSessionManager persistentStorageSessionManager; 061 062 @Inject 063 private OcflObjectSessionFactory ocflObjectSessionFactory; 064 065 @Autowired 066 @Qualifier("ocflIndex") 067 private FedoraToOcflObjectIndex ocflIndex; 068 069 @Autowired 070 @Qualifier("containmentIndex") 071 private ContainmentIndex containmentIndex; 072 073 @Autowired 074 @Qualifier("searchIndex") 075 private SearchIndex searchIndex; 076 077 @Autowired 078 @Qualifier("referenceService") 079 private ReferenceService referenceService; 080 081 @Inject 082 private MembershipService membershipService; 083 084 @Inject 085 private ObjectValidator objectValidator; 086 087 @Inject 088 private FedoraPropsConfig config; 089 090 private static final Logger LOGGER = getLogger(ReindexService.class); 091 092 private int membershipPageSize = 500; 093 094 public void indexOcflObject(final Transaction tx, final String ocflId) { 095 LOGGER.debug("Indexing ocflId {} in transaction {}", ocflId, tx.getId()); 096 097 objectValidator.validate(ocflId, config.isRebuildFixityCheck()); 098 099 try (final var session = ocflObjectSessionFactory.newSession(ocflId)) { 100 final var rootId = new AtomicReference<FedoraId>(); 101 final var fedoraIds = new ArrayList<FedoraId>(); 102 final var headersList = new ArrayList<ResourceHeaders>(); 103 104 session.streamResourceHeaders().forEach(storageHeaders -> { 105 final var headers = new ResourceHeadersAdapter(storageHeaders); 106 107 final var fedoraId = headers.getId(); 108 fedoraIds.add(fedoraId); 109 if (headers.isArchivalGroup() || headers.isObjectRoot()) { 110 rootId.set(fedoraId); 111 } 112 113 if (!fedoraId.isRepositoryRoot()) { 114 var parentId = headers.getParent(); 115 116 if (headers.getParent() == null) { 117 if (headers.isObjectRoot()) { 118 parentId = FedoraId.getRepositoryRootId(); 119 } else { 120 throw new IllegalStateException( 121 String.format("Resource %s must have a parent defined", fedoraId.getFullId())); 122 } 123 } 124 final var created = headers.getCreatedDate(); 125 if (!headers.isDeleted()) { 126 if (!headers.getInteractionModel().equals(NON_RDF_SOURCE.toString())) { 127 final Optional<InputStream> content = session.readContent(fedoraId.getFullId()) 128 .getContentStream(); 129 if (content.isPresent()) { 130 try (final var stream = content.get()) { 131 final RdfStream rdf = parseRdf(fedoraId, stream); 132 this.referenceService.updateReferences(tx, fedoraId, null, rdf); 133 } catch (final IOException e) { 134 LOGGER.warn("Content stream for {} closed prematurely, inbound references skipped.", 135 fedoraId.getFullId()); 136 throw new RepositoryRuntimeException(e.getMessage(), e); 137 } 138 } 139 } 140 141 this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, null); 142 headersList.add(headers.asKernelHeaders()); 143 } else { 144 final var deleted = headers.getLastModifiedDate(); 145 this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, deleted); 146 } 147 } 148 }); 149 150 if (rootId.get() == null) { 151 throw new IllegalStateException(String.format("Failed to find the root resource in object " + 152 "identified by %s. Please ensure that the object ID you are attempting to index " + 153 "refers to a corresponding valid Fedora-flavored object in the OCFL repository. Additionally " + 154 "be sure that the object ID corresponds with the object root resource (as opposed to child " + 155 "resources within the object).", ocflId)); 156 } 157 158 fedoraIds.forEach(fedoraIdentifier -> { 159 final var rootFedoraIdentifier = rootId.get(); 160 ocflIndex.addMapping(tx, fedoraIdentifier, rootFedoraIdentifier, ocflId); 161 LOGGER.debug("Rebuilt fedora-to-ocfl object index entry for {}", fedoraIdentifier); 162 }); 163 164 headersList.forEach(headers -> { 165 searchIndex.addUpdateIndex(tx, headers); 166 LOGGER.debug("Rebuilt searchIndex for {}", headers.getId()); 167 }); 168 } 169 } 170 171 /** 172 * Remove persistent sessions for a transaction to avoid memory leaks. 173 * @param transactionId the transaction id. 174 */ 175 public void cleanupSession(final String transactionId) { 176 persistentStorageSessionManager.removeSession(transactionId); 177 } 178 179 /** 180 * Set the membership page size. 181 * @param pageSize the new page size. 182 */ 183 public void setMembershipPageSize(final int pageSize) { 184 membershipPageSize = pageSize; 185 } 186 187 /** 188 * Reset all the indexes. 189 */ 190 public void reset() { 191 ocflIndex.reset(); 192 containmentIndex.reset(); 193 searchIndex.reset(); 194 referenceService.reset(); 195 membershipService.reset(); 196 } 197 198 /** 199 * Index all membership properties by querying for Direct containers, and then 200 * trying population of the membership index for each one 201 * @param transaction the transaction id. 202 */ 203 public void indexMembership(final Transaction transaction) { 204 LOGGER.debug("Starting indexMembership for transaction {}", transaction); 205 final var fields = List.of(Condition.Field.FEDORA_ID); 206 final var conditions = List.of(Condition.fromEnums(Condition.Field.RDF_TYPE, Condition.Operator.EQ, 207 RdfLexicon.DIRECT_CONTAINER.getURI())); 208 int offset = 0; 209 210 try { 211 int numResults; 212 do { 213 final var params = new SearchParameters(fields, conditions, membershipPageSize, 214 offset, Condition.Field.FEDORA_ID, "asc", false); 215 216 final var searchResult = searchIndex.doSearch(params); 217 final var resultList = searchResult.getItems(); 218 numResults = resultList.size(); 219 220 resultList.stream() 221 .map(entry -> FedoraId.create((String) entry.get(Condition.Field.FEDORA_ID.toString()))) 222 .forEach(containerId -> membershipService.populateMembershipHistory(transaction, containerId)); 223 224 // Results are paged, so step through pages until we reach the last one 225 offset += membershipPageSize; 226 } while (numResults == membershipPageSize); 227 228 } catch (final InvalidQueryException e) { 229 throw new RepositoryRuntimeException("Failed to repopulate membership history", e); 230 } 231 LOGGER.debug("Finished indexMembership for transaction {}", transaction); 232 } 233 234 /** 235 * Rollback changes in the transaction. 236 * @param tx the transaction 237 */ 238 public void rollbackMembership(@NotNull final Transaction tx) { 239 execQuietly("Failed to rollback membership index transaction " + tx.getId(), () -> { 240 membershipService.rollbackTransaction(tx); 241 return null; 242 }); 243 } 244 245 /** 246 * Executes the closure, capturing all exceptions, and logging them as errors. 247 * 248 * @param failureMessage what to print if the closure fails 249 * @param callable closure to execute 250 */ 251 private void execQuietly(final String failureMessage, final Callable<Void> callable) { 252 try { 253 callable.call(); 254 } catch (final Exception e) { 255 LOGGER.error(failureMessage, e); 256 } 257 } 258 259 /** 260 * Parse the inputstream from a Rdf resource to a RDFstream. 261 * 262 * @param fedoraIdentifier the resource identifier. 263 * @param inputStream the inputstream. 264 * @return an RdfStream of the resource triples. 265 */ 266 private static RdfStream parseRdf(final FedoraId fedoraIdentifier, final InputStream inputStream) { 267 final Model model = createDefaultModel(); 268 RDFDataMgr.read(model, inputStream, getRdfFormat().getLang()); 269 final FedoraId topic = (fedoraIdentifier.isDescription() ? fedoraIdentifier.asBaseId() : fedoraIdentifier); 270 return DefaultRdfStream.fromModel(createURI(topic.getFullId()), model); 271 } 272}