001/* 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 */ 006package org.fcrepo.persistence.ocfl.impl; 007 008import static org.apache.jena.graph.NodeFactory.createURI; 009import static org.apache.jena.rdf.model.ModelFactory.createDefaultModel; 010import static org.fcrepo.kernel.api.RdfLexicon.NON_RDF_SOURCE; 011import static org.fcrepo.persistence.ocfl.impl.OcflPersistentStorageUtils.getRdfFormat; 012import static org.slf4j.LoggerFactory.getLogger; 013 014import java.io.IOException; 015import java.io.InputStream; 016import java.util.ArrayList; 017import java.util.List; 018import java.util.Optional; 019import java.util.concurrent.Callable; 020import java.util.concurrent.atomic.AtomicReference; 021 022import javax.inject.Inject; 023import javax.validation.constraints.NotNull; 024 025import edu.wisc.library.ocfl.api.OcflRepository; 026import org.fcrepo.config.FedoraPropsConfig; 027import org.fcrepo.kernel.api.ContainmentIndex; 028import org.fcrepo.kernel.api.RdfLexicon; 029import org.fcrepo.kernel.api.RdfStream; 030import org.fcrepo.kernel.api.Transaction; 031import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 032import org.fcrepo.kernel.api.identifiers.FedoraId; 033import org.fcrepo.kernel.api.models.ResourceHeaders; 034import org.fcrepo.kernel.api.rdf.DefaultRdfStream; 035import org.fcrepo.kernel.api.services.MembershipService; 036import org.fcrepo.kernel.api.services.ReferenceService; 037import org.fcrepo.persistence.api.PersistentStorageSessionManager; 038import org.fcrepo.persistence.api.exceptions.ObjectExistsInOcflIndexException; 039import org.fcrepo.persistence.ocfl.api.FedoraOcflMappingNotFoundException; 040import org.fcrepo.persistence.ocfl.api.FedoraToOcflObjectIndex; 041import org.fcrepo.search.api.Condition; 042import org.fcrepo.search.api.InvalidQueryException; 043import org.fcrepo.search.api.SearchIndex; 044import org.fcrepo.search.api.SearchParameters; 045import org.fcrepo.storage.ocfl.OcflObjectSessionFactory; 046import org.fcrepo.storage.ocfl.validation.ObjectValidator; 047 048import org.apache.jena.rdf.model.Model; 049import org.apache.jena.riot.RDFDataMgr; 050import org.slf4j.Logger; 051import org.springframework.beans.factory.annotation.Autowired; 052import org.springframework.beans.factory.annotation.Qualifier; 053import org.springframework.stereotype.Component; 054 055/** 056 * Service that does the reindexing for one OCFL object. 057 * @author whikloj 058 */ 059@Component 060public class ReindexService { 061 062 @Inject 063 private PersistentStorageSessionManager persistentStorageSessionManager; 064 065 @Inject 066 private OcflObjectSessionFactory ocflObjectSessionFactory; 067 068 @Autowired 069 @Qualifier("ocflIndex") 070 private FedoraToOcflObjectIndex ocflIndex; 071 072 @Autowired 073 private OcflRepository ocflRepository; 074 075 @Autowired 076 @Qualifier("containmentIndex") 077 private ContainmentIndex containmentIndex; 078 079 @Autowired 080 @Qualifier("searchIndex") 081 private SearchIndex searchIndex; 082 083 @Autowired 084 @Qualifier("referenceService") 085 private ReferenceService referenceService; 086 087 @Inject 088 private MembershipService membershipService; 089 090 @Inject 091 private ObjectValidator objectValidator; 092 093 @Inject 094 private FedoraPropsConfig config; 095 096 private static final Logger LOGGER = getLogger(ReindexService.class); 097 098 private int membershipPageSize = 500; 099 100 public void indexOcflObject(final Transaction tx, final String ocflId) { 101 LOGGER.debug("Indexing ocflId {} in transaction {}", ocflId, tx.getId()); 102 103 ocflRepository.invalidateCache(ocflId); 104 if (config.isRebuildValidation()) { 105 objectValidator.validate(ocflId, config.isRebuildFixityCheck()); 106 } 107 108 try (final var session = ocflObjectSessionFactory.newSession(ocflId)) { 109 final var rootId = new AtomicReference<FedoraId>(); 110 final var fedoraIds = new ArrayList<FedoraId>(); 111 final var headersList = new ArrayList<ResourceHeaders>(); 112 113 session.invalidateCache(ocflId); 114 session.streamResourceHeaders().forEach(storageHeaders -> { 115 final var headers = new ResourceHeadersAdapter(storageHeaders); 116 117 final var fedoraId = headers.getId(); 118 119 if (config.isRebuildContinue()) { 120 try { 121 ocflIndex.getMapping(tx, fedoraId); 122 // We got the mapping, so we can skip this resource. 123 throw new ObjectExistsInOcflIndexException( 124 String.format("Skipping indexing of %s in transaction %s, because" + 125 " it already exists in the index.", fedoraId, tx.getId()) 126 ); 127 } catch (FedoraOcflMappingNotFoundException e) { 128 LOGGER.debug("Indexing object {} in transaction {}, because it does not yet exist in the " + 129 "index.", fedoraId, tx.getId()); 130 } 131 } 132 133 fedoraIds.add(fedoraId); 134 if (headers.isArchivalGroup() || headers.isObjectRoot()) { 135 rootId.set(fedoraId); 136 } 137 138 if (!fedoraId.isRepositoryRoot()) { 139 var parentId = headers.getParent(); 140 141 if (headers.getParent() == null) { 142 if (headers.isObjectRoot()) { 143 parentId = FedoraId.getRepositoryRootId(); 144 } else { 145 throw new IllegalStateException( 146 String.format("Resource %s must have a parent defined", fedoraId.getFullId())); 147 } 148 } 149 final var created = headers.getCreatedDate(); 150 if (!headers.isDeleted()) { 151 if (!headers.getInteractionModel().equals(NON_RDF_SOURCE.toString())) { 152 final Optional<InputStream> content = session.readContent(fedoraId.getFullId()) 153 .getContentStream(); 154 if (content.isPresent()) { 155 try (final var stream = content.get()) { 156 final RdfStream rdf = parseRdf(fedoraId, stream); 157 this.referenceService.updateReferences(tx, fedoraId, null, rdf); 158 } catch (final IOException e) { 159 LOGGER.warn("Content stream for {} closed prematurely, inbound references skipped.", 160 fedoraId.getFullId()); 161 throw new RepositoryRuntimeException(e.getMessage(), e); 162 } 163 } 164 } 165 166 this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, null); 167 headersList.add(headers.asKernelHeaders()); 168 } else { 169 final var deleted = headers.getLastModifiedDate(); 170 this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, deleted); 171 } 172 } 173 }); 174 175 if (rootId.get() == null) { 176 throw new IllegalStateException(String.format("Failed to find the root resource in object " + 177 "identified by %s. Please ensure that the object ID you are attempting to index " + 178 "refers to a corresponding valid Fedora-flavored object in the OCFL repository. Additionally " + 179 "be sure that the object ID corresponds with the object root resource (as opposed to child " + 180 "resources within the object).", ocflId)); 181 } 182 183 fedoraIds.forEach(fedoraIdentifier -> { 184 final var rootFedoraIdentifier = rootId.get(); 185 ocflIndex.addMapping(tx, fedoraIdentifier, rootFedoraIdentifier, ocflId); 186 LOGGER.debug("Rebuilt fedora-to-ocfl object index entry for {}", fedoraIdentifier); 187 }); 188 189 headersList.forEach(headers -> { 190 searchIndex.addUpdateIndex(tx, headers); 191 LOGGER.debug("Rebuilt searchIndex for {}", headers.getId()); 192 }); 193 } 194 } 195 196 /** 197 * Remove persistent sessions for a transaction to avoid memory leaks. 198 * @param transactionId the transaction id. 199 */ 200 public void cleanupSession(final String transactionId) { 201 persistentStorageSessionManager.removeSession(transactionId); 202 } 203 204 /** 205 * Set the membership page size. 206 * @param pageSize the new page size. 207 */ 208 public void setMembershipPageSize(final int pageSize) { 209 membershipPageSize = pageSize; 210 } 211 212 /** 213 * Reset all the indexes. 214 */ 215 public void reset() { 216 ocflIndex.reset(); 217 containmentIndex.reset(); 218 searchIndex.reset(); 219 referenceService.reset(); 220 membershipService.reset(); 221 } 222 223 /** 224 * Index all membership properties by querying for Direct containers, and then 225 * trying population of the membership index for each one 226 * @param transaction the transaction id. 227 */ 228 public void indexMembership(final Transaction transaction) { 229 LOGGER.debug("Starting indexMembership for transaction {}", transaction); 230 final var fields = List.of(Condition.Field.FEDORA_ID); 231 final var conditions = List.of(Condition.fromEnums(Condition.Field.RDF_TYPE, Condition.Operator.EQ, 232 RdfLexicon.DIRECT_CONTAINER.getURI())); 233 int offset = 0; 234 235 try { 236 int numResults; 237 do { 238 final var params = new SearchParameters(fields, conditions, membershipPageSize, 239 offset, Condition.Field.FEDORA_ID, "asc", false); 240 241 final var searchResult = searchIndex.doSearch(params); 242 final var resultList = searchResult.getItems(); 243 numResults = resultList.size(); 244 245 resultList.stream() 246 .map(entry -> FedoraId.create((String) entry.get(Condition.Field.FEDORA_ID.toString()))) 247 .forEach(containerId -> membershipService.populateMembershipHistory(transaction, containerId)); 248 249 // Results are paged, so step through pages until we reach the last one 250 offset += membershipPageSize; 251 } while (numResults == membershipPageSize); 252 253 } catch (final InvalidQueryException e) { 254 throw new RepositoryRuntimeException("Failed to repopulate membership history", e); 255 } 256 LOGGER.debug("Finished indexMembership for transaction {}", transaction); 257 } 258 259 /** 260 * Rollback changes in the transaction. 261 * @param tx the transaction 262 */ 263 public void rollbackMembership(@NotNull final Transaction tx) { 264 execQuietly("Failed to rollback membership index transaction " + tx.getId(), () -> { 265 membershipService.rollbackTransaction(tx); 266 return null; 267 }); 268 } 269 270 /** 271 * Executes the closure, capturing all exceptions, and logging them as errors. 272 * 273 * @param failureMessage what to print if the closure fails 274 * @param callable closure to execute 275 */ 276 private void execQuietly(final String failureMessage, final Callable<Void> callable) { 277 try { 278 callable.call(); 279 } catch (final Exception e) { 280 LOGGER.error(failureMessage, e); 281 } 282 } 283 284 /** 285 * Parse the inputstream from a Rdf resource to a RDFstream. 286 * 287 * @param fedoraIdentifier the resource identifier. 288 * @param inputStream the inputstream. 289 * @return an RdfStream of the resource triples. 290 */ 291 private static RdfStream parseRdf(final FedoraId fedoraIdentifier, final InputStream inputStream) { 292 final Model model = createDefaultModel(); 293 RDFDataMgr.read(model, inputStream, getRdfFormat().getLang()); 294 final FedoraId topic = (fedoraIdentifier.isDescription() ? fedoraIdentifier.asBaseId() : fedoraIdentifier); 295 return DefaultRdfStream.fromModel(createURI(topic.getFullId()), model); 296 } 297}