001/* 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 */ 006package org.fcrepo.persistence.ocfl.impl; 007 008import static org.apache.jena.graph.NodeFactory.createURI; 009import static org.apache.jena.rdf.model.ModelFactory.createDefaultModel; 010import static org.fcrepo.kernel.api.RdfLexicon.NON_RDF_SOURCE; 011import static org.fcrepo.persistence.ocfl.impl.OcflPersistentStorageUtils.getRdfFormat; 012import static org.slf4j.LoggerFactory.getLogger; 013 014import java.io.IOException; 015import java.io.InputStream; 016import java.util.ArrayList; 017import java.util.List; 018import java.util.Optional; 019import java.util.concurrent.Callable; 020import java.util.concurrent.atomic.AtomicReference; 021 022import javax.inject.Inject; 023import javax.validation.constraints.NotNull; 024 025import edu.wisc.library.ocfl.api.OcflRepository; 026import org.fcrepo.config.FedoraPropsConfig; 027import org.fcrepo.kernel.api.ContainmentIndex; 028import org.fcrepo.kernel.api.RdfLexicon; 029import org.fcrepo.kernel.api.RdfStream; 030import org.fcrepo.kernel.api.Transaction; 031import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 032import org.fcrepo.kernel.api.identifiers.FedoraId; 033import org.fcrepo.kernel.api.models.ResourceHeaders; 034import org.fcrepo.kernel.api.rdf.DefaultRdfStream; 035import org.fcrepo.kernel.api.services.MembershipService; 036import org.fcrepo.kernel.api.services.ReferenceService; 037import org.fcrepo.persistence.api.PersistentStorageSessionManager; 038import org.fcrepo.persistence.ocfl.api.FedoraToOcflObjectIndex; 039import org.fcrepo.search.api.Condition; 040import org.fcrepo.search.api.InvalidQueryException; 041import org.fcrepo.search.api.SearchIndex; 042import org.fcrepo.search.api.SearchParameters; 043import org.fcrepo.storage.ocfl.OcflObjectSessionFactory; 044import org.fcrepo.storage.ocfl.validation.ObjectValidator; 045 046import org.apache.jena.rdf.model.Model; 047import org.apache.jena.riot.RDFDataMgr; 048import org.slf4j.Logger; 049import org.springframework.beans.factory.annotation.Autowired; 050import org.springframework.beans.factory.annotation.Qualifier; 051import org.springframework.stereotype.Component; 052 053/** 054 * Service that does the reindexing for one OCFL object. 055 * @author whikloj 056 */ 057@Component 058public class ReindexService { 059 060 @Inject 061 private PersistentStorageSessionManager persistentStorageSessionManager; 062 063 @Inject 064 private OcflObjectSessionFactory ocflObjectSessionFactory; 065 066 @Autowired 067 @Qualifier("ocflIndex") 068 private FedoraToOcflObjectIndex ocflIndex; 069 070 @Autowired 071 private OcflRepository ocflRepository; 072 073 @Autowired 074 @Qualifier("containmentIndex") 075 private ContainmentIndex containmentIndex; 076 077 @Autowired 078 @Qualifier("searchIndex") 079 private SearchIndex searchIndex; 080 081 @Autowired 082 @Qualifier("referenceService") 083 private ReferenceService referenceService; 084 085 @Inject 086 private MembershipService membershipService; 087 088 @Inject 089 private ObjectValidator objectValidator; 090 091 @Inject 092 private FedoraPropsConfig config; 093 094 private static final Logger LOGGER = getLogger(ReindexService.class); 095 096 private int membershipPageSize = 500; 097 098 public void indexOcflObject(final Transaction tx, final String ocflId) { 099 LOGGER.debug("Indexing ocflId {} in transaction {}", ocflId, tx.getId()); 100 101 ocflRepository.invalidateCache(ocflId); 102 objectValidator.validate(ocflId, config.isRebuildFixityCheck()); 103 104 try (final var session = ocflObjectSessionFactory.newSession(ocflId)) { 105 final var rootId = new AtomicReference<FedoraId>(); 106 final var fedoraIds = new ArrayList<FedoraId>(); 107 final var headersList = new ArrayList<ResourceHeaders>(); 108 109 session.invalidateCache(ocflId); 110 session.streamResourceHeaders().forEach(storageHeaders -> { 111 final var headers = new ResourceHeadersAdapter(storageHeaders); 112 113 final var fedoraId = headers.getId(); 114 fedoraIds.add(fedoraId); 115 if (headers.isArchivalGroup() || headers.isObjectRoot()) { 116 rootId.set(fedoraId); 117 } 118 119 if (!fedoraId.isRepositoryRoot()) { 120 var parentId = headers.getParent(); 121 122 if (headers.getParent() == null) { 123 if (headers.isObjectRoot()) { 124 parentId = FedoraId.getRepositoryRootId(); 125 } else { 126 throw new IllegalStateException( 127 String.format("Resource %s must have a parent defined", fedoraId.getFullId())); 128 } 129 } 130 final var created = headers.getCreatedDate(); 131 if (!headers.isDeleted()) { 132 if (!headers.getInteractionModel().equals(NON_RDF_SOURCE.toString())) { 133 final Optional<InputStream> content = session.readContent(fedoraId.getFullId()) 134 .getContentStream(); 135 if (content.isPresent()) { 136 try (final var stream = content.get()) { 137 final RdfStream rdf = parseRdf(fedoraId, stream); 138 this.referenceService.updateReferences(tx, fedoraId, null, rdf); 139 } catch (final IOException e) { 140 LOGGER.warn("Content stream for {} closed prematurely, inbound references skipped.", 141 fedoraId.getFullId()); 142 throw new RepositoryRuntimeException(e.getMessage(), e); 143 } 144 } 145 } 146 147 this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, null); 148 headersList.add(headers.asKernelHeaders()); 149 } else { 150 final var deleted = headers.getLastModifiedDate(); 151 this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, deleted); 152 } 153 } 154 }); 155 156 if (rootId.get() == null) { 157 throw new IllegalStateException(String.format("Failed to find the root resource in object " + 158 "identified by %s. Please ensure that the object ID you are attempting to index " + 159 "refers to a corresponding valid Fedora-flavored object in the OCFL repository. Additionally " + 160 "be sure that the object ID corresponds with the object root resource (as opposed to child " + 161 "resources within the object).", ocflId)); 162 } 163 164 fedoraIds.forEach(fedoraIdentifier -> { 165 final var rootFedoraIdentifier = rootId.get(); 166 ocflIndex.addMapping(tx, fedoraIdentifier, rootFedoraIdentifier, ocflId); 167 LOGGER.debug("Rebuilt fedora-to-ocfl object index entry for {}", fedoraIdentifier); 168 }); 169 170 headersList.forEach(headers -> { 171 searchIndex.addUpdateIndex(tx, headers); 172 LOGGER.debug("Rebuilt searchIndex for {}", headers.getId()); 173 }); 174 } 175 } 176 177 /** 178 * Remove persistent sessions for a transaction to avoid memory leaks. 179 * @param transactionId the transaction id. 180 */ 181 public void cleanupSession(final String transactionId) { 182 persistentStorageSessionManager.removeSession(transactionId); 183 } 184 185 /** 186 * Set the membership page size. 187 * @param pageSize the new page size. 188 */ 189 public void setMembershipPageSize(final int pageSize) { 190 membershipPageSize = pageSize; 191 } 192 193 /** 194 * Reset all the indexes. 195 */ 196 public void reset() { 197 ocflIndex.reset(); 198 containmentIndex.reset(); 199 searchIndex.reset(); 200 referenceService.reset(); 201 membershipService.reset(); 202 } 203 204 /** 205 * Index all membership properties by querying for Direct containers, and then 206 * trying population of the membership index for each one 207 * @param transaction the transaction id. 208 */ 209 public void indexMembership(final Transaction transaction) { 210 LOGGER.debug("Starting indexMembership for transaction {}", transaction); 211 final var fields = List.of(Condition.Field.FEDORA_ID); 212 final var conditions = List.of(Condition.fromEnums(Condition.Field.RDF_TYPE, Condition.Operator.EQ, 213 RdfLexicon.DIRECT_CONTAINER.getURI())); 214 int offset = 0; 215 216 try { 217 int numResults; 218 do { 219 final var params = new SearchParameters(fields, conditions, membershipPageSize, 220 offset, Condition.Field.FEDORA_ID, "asc", false); 221 222 final var searchResult = searchIndex.doSearch(params); 223 final var resultList = searchResult.getItems(); 224 numResults = resultList.size(); 225 226 resultList.stream() 227 .map(entry -> FedoraId.create((String) entry.get(Condition.Field.FEDORA_ID.toString()))) 228 .forEach(containerId -> membershipService.populateMembershipHistory(transaction, containerId)); 229 230 // Results are paged, so step through pages until we reach the last one 231 offset += membershipPageSize; 232 } while (numResults == membershipPageSize); 233 234 } catch (final InvalidQueryException e) { 235 throw new RepositoryRuntimeException("Failed to repopulate membership history", e); 236 } 237 LOGGER.debug("Finished indexMembership for transaction {}", transaction); 238 } 239 240 /** 241 * Rollback changes in the transaction. 242 * @param tx the transaction 243 */ 244 public void rollbackMembership(@NotNull final Transaction tx) { 245 execQuietly("Failed to rollback membership index transaction " + tx.getId(), () -> { 246 membershipService.rollbackTransaction(tx); 247 return null; 248 }); 249 } 250 251 /** 252 * Executes the closure, capturing all exceptions, and logging them as errors. 253 * 254 * @param failureMessage what to print if the closure fails 255 * @param callable closure to execute 256 */ 257 private void execQuietly(final String failureMessage, final Callable<Void> callable) { 258 try { 259 callable.call(); 260 } catch (final Exception e) { 261 LOGGER.error(failureMessage, e); 262 } 263 } 264 265 /** 266 * Parse the inputstream from a Rdf resource to a RDFstream. 267 * 268 * @param fedoraIdentifier the resource identifier. 269 * @param inputStream the inputstream. 270 * @return an RdfStream of the resource triples. 271 */ 272 private static RdfStream parseRdf(final FedoraId fedoraIdentifier, final InputStream inputStream) { 273 final Model model = createDefaultModel(); 274 RDFDataMgr.read(model, inputStream, getRdfFormat().getLang()); 275 final FedoraId topic = (fedoraIdentifier.isDescription() ? fedoraIdentifier.asBaseId() : fedoraIdentifier); 276 return DefaultRdfStream.fromModel(createURI(topic.getFullId()), model); 277 } 278}