001/* 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 */ 006package org.fcrepo.persistence.ocfl.impl; 007 008import static org.apache.jena.graph.NodeFactory.createURI; 009import static org.apache.jena.rdf.model.ModelFactory.createDefaultModel; 010import static org.fcrepo.kernel.api.RdfLexicon.NON_RDF_SOURCE; 011import static org.fcrepo.persistence.ocfl.impl.OcflPersistentStorageUtils.getRdfFormat; 012import static org.slf4j.LoggerFactory.getLogger; 013 014import java.io.IOException; 015import java.io.InputStream; 016import java.util.ArrayList; 017import java.util.List; 018import java.util.Optional; 019import java.util.concurrent.Callable; 020import java.util.concurrent.atomic.AtomicReference; 021 022import javax.inject.Inject; 023import javax.validation.constraints.NotNull; 024 025import io.ocfl.api.OcflRepository; 026import org.apache.jena.rdf.model.Resource; 027import org.fcrepo.config.FedoraPropsConfig; 028import org.fcrepo.kernel.api.ContainmentIndex; 029import org.fcrepo.kernel.api.RdfLexicon; 030import org.fcrepo.kernel.api.RdfStream; 031import org.fcrepo.kernel.api.Transaction; 032import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 033import org.fcrepo.kernel.api.identifiers.FedoraId; 034import org.fcrepo.kernel.api.models.ResourceHeaders; 035import org.fcrepo.kernel.api.rdf.DefaultRdfStream; 036import org.fcrepo.kernel.api.services.MembershipService; 037import org.fcrepo.kernel.api.services.ReferenceService; 038import org.fcrepo.persistence.api.PersistentStorageSessionManager; 039import org.fcrepo.persistence.api.exceptions.ObjectExistsInOcflIndexException; 040import org.fcrepo.persistence.ocfl.api.FedoraOcflMappingNotFoundException; 041import org.fcrepo.persistence.ocfl.api.FedoraToOcflObjectIndex; 042import org.fcrepo.search.api.Condition; 043import org.fcrepo.search.api.InvalidQueryException; 044import org.fcrepo.search.api.SearchIndex; 045import org.fcrepo.search.api.SearchParameters; 046import org.fcrepo.storage.ocfl.OcflObjectSessionFactory; 047import org.fcrepo.storage.ocfl.validation.ObjectValidator; 048 049import org.apache.jena.rdf.model.Model; 050import org.apache.jena.riot.RDFDataMgr; 051import org.slf4j.Logger; 052import org.springframework.beans.factory.annotation.Autowired; 053import org.springframework.beans.factory.annotation.Qualifier; 054import org.springframework.stereotype.Component; 055 056/** 057 * Service that does the reindexing for one OCFL object. 058 * @author whikloj 059 */ 060@Component 061public class ReindexService { 062 063 @Inject 064 private PersistentStorageSessionManager persistentStorageSessionManager; 065 066 @Inject 067 private OcflObjectSessionFactory ocflObjectSessionFactory; 068 069 @Autowired 070 @Qualifier("ocflIndex") 071 private FedoraToOcflObjectIndex ocflIndex; 072 073 @Autowired 074 private OcflRepository ocflRepository; 075 076 @Autowired 077 @Qualifier("containmentIndex") 078 private ContainmentIndex containmentIndex; 079 080 @Autowired 081 @Qualifier("searchIndex") 082 private SearchIndex searchIndex; 083 084 @Autowired 085 @Qualifier("referenceService") 086 private ReferenceService referenceService; 087 088 @Inject 089 private MembershipService membershipService; 090 091 @Inject 092 private ObjectValidator objectValidator; 093 094 @Inject 095 private FedoraPropsConfig config; 096 097 private static final Logger LOGGER = getLogger(ReindexService.class); 098 099 private int membershipPageSize = 500; 100 101 public void indexOcflObject(final Transaction tx, final String ocflId) { 102 LOGGER.debug("Indexing ocflId {} in transaction {}", ocflId, tx.getId()); 103 104 ocflRepository.invalidateCache(ocflId); 105 if (config.isRebuildValidation()) { 106 objectValidator.validate(ocflId, config.isRebuildFixityCheck()); 107 } 108 109 try (final var session = ocflObjectSessionFactory.newSession(ocflId)) { 110 final var rootId = new AtomicReference<FedoraId>(); 111 final var fedoraIds = new ArrayList<FedoraId>(); 112 final var headersList = new ArrayList<ResourceHeaders>(); 113 114 session.invalidateCache(ocflId); 115 session.streamResourceHeaders().forEach(storageHeaders -> { 116 final var headers = new ResourceHeadersAdapter(storageHeaders); 117 118 final var fedoraId = headers.getId(); 119 120 if (config.isRebuildContinue()) { 121 try { 122 ocflIndex.getMapping(tx, fedoraId); 123 // We got the mapping, so we can skip this resource. 124 throw new ObjectExistsInOcflIndexException( 125 String.format("Skipping indexing of %s in transaction %s, because" + 126 " it already exists in the index.", fedoraId, tx.getId()) 127 ); 128 } catch (FedoraOcflMappingNotFoundException e) { 129 LOGGER.debug("Indexing object {} in transaction {}, because it does not yet exist in the " + 130 "index.", fedoraId, tx.getId()); 131 } 132 } 133 134 fedoraIds.add(fedoraId); 135 if (headers.isArchivalGroup() || headers.isObjectRoot()) { 136 rootId.set(fedoraId); 137 } 138 139 if (!fedoraId.isRepositoryRoot()) { 140 var parentId = headers.getParent(); 141 142 if (headers.getParent() == null) { 143 if (headers.isObjectRoot()) { 144 parentId = FedoraId.getRepositoryRootId(); 145 } else { 146 throw new IllegalStateException( 147 String.format("Resource %s must have a parent defined", fedoraId.getFullId())); 148 } 149 } 150 final var created = headers.getCreatedDate(); 151 if (!headers.isDeleted()) { 152 if (!headers.getInteractionModel().equals(NON_RDF_SOURCE.toString())) { 153 final Optional<InputStream> content = session.readContent(fedoraId.getFullId()) 154 .getContentStream(); 155 if (content.isPresent()) { 156 try (final var stream = content.get()) { 157 final RdfStream rdf = parseRdf(fedoraId, stream); 158 this.referenceService.updateReferences(tx, fedoraId, null, rdf); 159 } catch (final IOException e) { 160 LOGGER.warn("Content stream for {} closed prematurely, inbound references skipped.", 161 fedoraId.getFullId()); 162 throw new RepositoryRuntimeException(e.getMessage(), e); 163 } 164 } 165 } 166 167 this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, null); 168 headersList.add(headers.asKernelHeaders()); 169 } else { 170 final var deleted = headers.getLastModifiedDate(); 171 this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, deleted); 172 } 173 } 174 }); 175 176 if (rootId.get() == null) { 177 throw new IllegalStateException(String.format("Failed to find the root resource in object " + 178 "identified by %s. Please ensure that the object ID you are attempting to index " + 179 "refers to a corresponding valid Fedora-flavored object in the OCFL repository. Additionally " + 180 "be sure that the object ID corresponds with the object root resource (as opposed to child " + 181 "resources within the object).", ocflId)); 182 } 183 184 fedoraIds.forEach(fedoraIdentifier -> { 185 final var rootFedoraIdentifier = rootId.get(); 186 ocflIndex.addMapping(tx, fedoraIdentifier, rootFedoraIdentifier, ocflId); 187 LOGGER.debug("Rebuilt fedora-to-ocfl object index entry for {}", fedoraIdentifier); 188 }); 189 190 headersList.forEach(headers -> { 191 searchIndex.addUpdateIndex(tx, headers); 192 LOGGER.debug("Rebuilt searchIndex for {}", headers.getId()); 193 }); 194 } 195 } 196 197 /** 198 * Remove persistent sessions for a transaction to avoid memory leaks. 199 * @param transactionId the transaction id. 200 */ 201 public void cleanupSession(final String transactionId) { 202 persistentStorageSessionManager.removeSession(transactionId); 203 } 204 205 /** 206 * Set the membership page size. 207 * @param pageSize the new page size. 208 */ 209 public void setMembershipPageSize(final int pageSize) { 210 membershipPageSize = pageSize; 211 } 212 213 /** 214 * Reset all the indexes. 215 */ 216 public void reset() { 217 ocflIndex.reset(); 218 containmentIndex.reset(); 219 searchIndex.reset(); 220 referenceService.reset(); 221 membershipService.reset(); 222 } 223 224 /** 225 * Index all membership properties by querying for Direct and Indirect containers, and then 226 * trying population of the membership index for each one 227 * @param transaction the transaction id. 228 */ 229 public void indexMembership(final Transaction transaction) { 230 indexContainerType(transaction, RdfLexicon.DIRECT_CONTAINER); 231 indexContainerType(transaction, RdfLexicon.INDIRECT_CONTAINER); 232 } 233 234 private void indexContainerType(final Transaction transaction, final Resource containerType) { 235 LOGGER.debug("Starting indexMembership for transaction {}", transaction); 236 final var fields = List.of(Condition.Field.FEDORA_ID); 237 final var conditions = List.of(Condition.fromEnums(Condition.Field.RDF_TYPE, Condition.Operator.EQ, 238 containerType.getURI())); 239 int offset = 0; 240 241 try { 242 int numResults; 243 do { 244 final var params = new SearchParameters(fields, conditions, membershipPageSize, 245 offset, Condition.Field.FEDORA_ID, "asc", false); 246 247 final var searchResult = searchIndex.doSearch(params); 248 final var resultList = searchResult.getItems(); 249 numResults = resultList.size(); 250 251 resultList.stream() 252 .map(entry -> FedoraId.create((String) entry.get(Condition.Field.FEDORA_ID.toString()))) 253 .forEach(containerId -> membershipService.populateMembershipHistory(transaction, containerId)); 254 255 // Results are paged, so step through pages until we reach the last one 256 offset += membershipPageSize; 257 } while (numResults == membershipPageSize); 258 259 } catch (final InvalidQueryException e) { 260 throw new RepositoryRuntimeException("Failed to repopulate membership history", e); 261 } 262 LOGGER.debug("Finished indexMembership for transaction {}", transaction); 263 } 264 265 /** 266 * Rollback changes in the transaction. 267 * @param tx the transaction 268 */ 269 public void rollbackMembership(@NotNull final Transaction tx) { 270 execQuietly("Failed to rollback membership index transaction " + tx.getId(), () -> { 271 membershipService.rollbackTransaction(tx); 272 return null; 273 }); 274 } 275 276 /** 277 * Executes the closure, capturing all exceptions, and logging them as errors. 278 * 279 * @param failureMessage what to print if the closure fails 280 * @param callable closure to execute 281 */ 282 private void execQuietly(final String failureMessage, final Callable<Void> callable) { 283 try { 284 callable.call(); 285 } catch (final Exception e) { 286 LOGGER.error(failureMessage, e); 287 } 288 } 289 290 /** 291 * Parse the inputstream from a Rdf resource to a RDFstream. 292 * 293 * @param fedoraIdentifier the resource identifier. 294 * @param inputStream the inputstream. 295 * @return an RdfStream of the resource triples. 296 */ 297 private static RdfStream parseRdf(final FedoraId fedoraIdentifier, final InputStream inputStream) { 298 final Model model = createDefaultModel(); 299 RDFDataMgr.read(model, inputStream, getRdfFormat().getLang()); 300 final FedoraId topic = (fedoraIdentifier.isDescription() ? fedoraIdentifier.asBaseId() : fedoraIdentifier); 301 return DefaultRdfStream.fromModel(createURI(topic.getFullId()), model); 302 } 303}