001/*
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 */
006package org.fcrepo.persistence.ocfl.impl;
007
008import static org.apache.jena.graph.NodeFactory.createURI;
009import static org.apache.jena.rdf.model.ModelFactory.createDefaultModel;
010import static org.fcrepo.kernel.api.RdfLexicon.NON_RDF_SOURCE;
011import static org.fcrepo.persistence.ocfl.impl.OcflPersistentStorageUtils.getRdfFormat;
012import static org.slf4j.LoggerFactory.getLogger;
013
014import java.io.IOException;
015import java.io.InputStream;
016import java.util.ArrayList;
017import java.util.List;
018import java.util.Optional;
019import java.util.concurrent.Callable;
020import java.util.concurrent.atomic.AtomicReference;
021
022import javax.inject.Inject;
023import javax.validation.constraints.NotNull;
024
025import io.ocfl.api.OcflRepository;
026import org.apache.jena.rdf.model.Resource;
027import org.fcrepo.config.FedoraPropsConfig;
028import org.fcrepo.kernel.api.ContainmentIndex;
029import org.fcrepo.kernel.api.RdfLexicon;
030import org.fcrepo.kernel.api.RdfStream;
031import org.fcrepo.kernel.api.Transaction;
032import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
033import org.fcrepo.kernel.api.identifiers.FedoraId;
034import org.fcrepo.kernel.api.models.ResourceHeaders;
035import org.fcrepo.kernel.api.rdf.DefaultRdfStream;
036import org.fcrepo.kernel.api.services.MembershipService;
037import org.fcrepo.kernel.api.services.ReferenceService;
038import org.fcrepo.persistence.api.PersistentStorageSessionManager;
039import org.fcrepo.persistence.api.exceptions.ObjectExistsInOcflIndexException;
040import org.fcrepo.persistence.ocfl.api.FedoraOcflMappingNotFoundException;
041import org.fcrepo.persistence.ocfl.api.FedoraToOcflObjectIndex;
042import org.fcrepo.search.api.Condition;
043import org.fcrepo.search.api.InvalidQueryException;
044import org.fcrepo.search.api.SearchIndex;
045import org.fcrepo.search.api.SearchParameters;
046import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
047import org.fcrepo.storage.ocfl.validation.ObjectValidator;
048
049import org.apache.jena.rdf.model.Model;
050import org.apache.jena.riot.RDFDataMgr;
051import org.slf4j.Logger;
052import org.springframework.beans.factory.annotation.Autowired;
053import org.springframework.beans.factory.annotation.Qualifier;
054import org.springframework.stereotype.Component;
055
056/**
057 * Service that does the reindexing for one OCFL object.
058 * @author whikloj
059 */
060@Component
061public class ReindexService {
062
063    @Inject
064    private PersistentStorageSessionManager persistentStorageSessionManager;
065
066    @Inject
067    private OcflObjectSessionFactory ocflObjectSessionFactory;
068
069    @Autowired
070    @Qualifier("ocflIndex")
071    private FedoraToOcflObjectIndex ocflIndex;
072
073    @Autowired
074    private OcflRepository ocflRepository;
075
076    @Autowired
077    @Qualifier("containmentIndex")
078    private ContainmentIndex containmentIndex;
079
080    @Autowired
081    @Qualifier("searchIndex")
082    private SearchIndex searchIndex;
083
084    @Autowired
085    @Qualifier("referenceService")
086    private ReferenceService referenceService;
087
088    @Inject
089    private MembershipService membershipService;
090
091    @Inject
092    private ObjectValidator objectValidator;
093
094    @Inject
095    private FedoraPropsConfig config;
096
097    private static final Logger LOGGER = getLogger(ReindexService.class);
098
099    private int membershipPageSize = 500;
100
101    public void indexOcflObject(final Transaction tx, final String ocflId) {
102        LOGGER.debug("Indexing ocflId {} in transaction {}", ocflId, tx.getId());
103
104        ocflRepository.invalidateCache(ocflId);
105        if (config.isRebuildValidation()) {
106            objectValidator.validate(ocflId, config.isRebuildFixityCheck());
107        }
108
109        try (final var session = ocflObjectSessionFactory.newSession(ocflId)) {
110            final var rootId = new AtomicReference<FedoraId>();
111            final var fedoraIds = new ArrayList<FedoraId>();
112            final var headersList = new ArrayList<ResourceHeaders>();
113
114            session.invalidateCache(ocflId);
115            session.streamResourceHeaders().forEach(storageHeaders -> {
116                final var headers = new ResourceHeadersAdapter(storageHeaders);
117
118                final var fedoraId = headers.getId();
119
120                if (config.isRebuildContinue()) {
121                    try {
122                        ocflIndex.getMapping(tx, fedoraId);
123                        // We got the mapping, so we can skip this resource.
124                        throw new ObjectExistsInOcflIndexException(
125                                String.format("Skipping indexing of %s in transaction %s, because" +
126                                " it already exists in the index.", fedoraId, tx.getId())
127                        );
128                    } catch (FedoraOcflMappingNotFoundException e) {
129                        LOGGER.debug("Indexing object {} in transaction {}, because it does not yet exist in the " +
130                                "index.", fedoraId, tx.getId());
131                    }
132                }
133
134                fedoraIds.add(fedoraId);
135                if (headers.isArchivalGroup() || headers.isObjectRoot()) {
136                    rootId.set(fedoraId);
137                }
138
139                if (!fedoraId.isRepositoryRoot()) {
140                    var parentId = headers.getParent();
141
142                    if (headers.getParent() == null) {
143                        if (headers.isObjectRoot()) {
144                            parentId = FedoraId.getRepositoryRootId();
145                        } else {
146                            throw new IllegalStateException(
147                                    String.format("Resource %s must have a parent defined", fedoraId.getFullId()));
148                        }
149                    }
150                    final var created = headers.getCreatedDate();
151                    if (!headers.isDeleted()) {
152                        if (!headers.getInteractionModel().equals(NON_RDF_SOURCE.toString())) {
153                            final Optional<InputStream> content = session.readContent(fedoraId.getFullId())
154                                    .getContentStream();
155                            if (content.isPresent()) {
156                                try (final var stream = content.get()) {
157                                    final RdfStream rdf = parseRdf(fedoraId, stream);
158                                    this.referenceService.updateReferences(tx, fedoraId, null, rdf);
159                                } catch (final IOException e) {
160                                    LOGGER.warn("Content stream for {} closed prematurely, inbound references skipped.",
161                                            fedoraId.getFullId());
162                                    throw new RepositoryRuntimeException(e.getMessage(), e);
163                                }
164                            }
165                        }
166
167                        this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, null);
168                        headersList.add(headers.asKernelHeaders());
169                    } else {
170                        final var deleted = headers.getLastModifiedDate();
171                        this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, deleted);
172                    }
173                }
174            });
175
176            if (rootId.get() == null) {
177                throw new IllegalStateException(String.format("Failed to find the root resource in object " +
178                        "identified by %s. Please ensure that the object ID you are attempting to index " +
179                        "refers to a corresponding valid Fedora-flavored object in the OCFL repository. Additionally " +
180                        "be sure that the object ID corresponds with the object root resource (as opposed to child " +
181                        "resources within the object).", ocflId));
182            }
183
184            fedoraIds.forEach(fedoraIdentifier -> {
185                final var rootFedoraIdentifier = rootId.get();
186                ocflIndex.addMapping(tx, fedoraIdentifier, rootFedoraIdentifier, ocflId);
187                LOGGER.debug("Rebuilt fedora-to-ocfl object index entry for {}", fedoraIdentifier);
188            });
189
190            headersList.forEach(headers -> {
191                searchIndex.addUpdateIndex(tx, headers);
192                LOGGER.debug("Rebuilt searchIndex for {}", headers.getId());
193            });
194        }
195    }
196
197    /**
198     * Remove persistent sessions for a transaction to avoid memory leaks.
199     * @param transactionId the transaction id.
200     */
201    public void cleanupSession(final String transactionId) {
202        persistentStorageSessionManager.removeSession(transactionId);
203    }
204
205    /**
206     * Set the membership page size.
207     * @param pageSize the new page size.
208     */
209    public void setMembershipPageSize(final int pageSize) {
210        membershipPageSize = pageSize;
211    }
212
213    /**
214     * Reset all the indexes.
215     */
216    public void reset() {
217        ocflIndex.reset();
218        containmentIndex.reset();
219        searchIndex.reset();
220        referenceService.reset();
221        membershipService.reset();
222    }
223
224    /**
225     * Index all membership properties by querying for Direct and Indirect containers, and then
226     * trying population of the membership index for each one
227     * @param transaction the transaction id.
228     */
229    public void indexMembership(final Transaction transaction) {
230        indexContainerType(transaction, RdfLexicon.DIRECT_CONTAINER);
231        indexContainerType(transaction, RdfLexicon.INDIRECT_CONTAINER);
232    }
233
234    private void indexContainerType(final Transaction transaction, final Resource containerType) {
235        LOGGER.debug("Starting indexMembership for transaction {}", transaction);
236        final var fields = List.of(Condition.Field.FEDORA_ID);
237        final var conditions = List.of(Condition.fromEnums(Condition.Field.RDF_TYPE, Condition.Operator.EQ,
238                containerType.getURI()));
239        int offset = 0;
240
241        try {
242            int numResults;
243            do {
244                final var params = new SearchParameters(fields, conditions, membershipPageSize,
245                        offset, Condition.Field.FEDORA_ID, "asc", false);
246
247                final var searchResult = searchIndex.doSearch(params);
248                final var resultList = searchResult.getItems();
249                numResults = resultList.size();
250
251                resultList.stream()
252                        .map(entry -> FedoraId.create((String) entry.get(Condition.Field.FEDORA_ID.toString())))
253                        .forEach(containerId -> membershipService.populateMembershipHistory(transaction, containerId));
254
255                // Results are paged, so step through pages until we reach the last one
256                offset += membershipPageSize;
257            } while (numResults == membershipPageSize);
258
259        } catch (final InvalidQueryException e) {
260            throw new RepositoryRuntimeException("Failed to repopulate membership history", e);
261        }
262        LOGGER.debug("Finished indexMembership for transaction {}", transaction);
263    }
264
265    /**
266     * Rollback changes in the transaction.
267     * @param tx the transaction
268     */
269    public void rollbackMembership(@NotNull final Transaction tx) {
270        execQuietly("Failed to rollback membership index transaction " + tx.getId(), () -> {
271            membershipService.rollbackTransaction(tx);
272            return null;
273        });
274    }
275
276    /**
277     * Executes the closure, capturing all exceptions, and logging them as errors.
278     *
279     * @param failureMessage what to print if the closure fails
280     * @param callable closure to execute
281     */
282    private void execQuietly(final String failureMessage, final Callable<Void> callable) {
283        try {
284            callable.call();
285        } catch (final Exception e) {
286            LOGGER.error(failureMessage, e);
287        }
288    }
289
290    /**
291     * Parse the inputstream from a Rdf resource to a RDFstream.
292     *
293     * @param fedoraIdentifier the resource identifier.
294     * @param inputStream the inputstream.
295     * @return an RdfStream of the resource triples.
296     */
297    private static RdfStream parseRdf(final FedoraId fedoraIdentifier, final InputStream inputStream) {
298        final Model model = createDefaultModel();
299        RDFDataMgr.read(model, inputStream, getRdfFormat().getLang());
300        final FedoraId topic = (fedoraIdentifier.isDescription() ? fedoraIdentifier.asBaseId() : fedoraIdentifier);
301        return DefaultRdfStream.fromModel(createURI(topic.getFullId()), model);
302    }
303}