001/*
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 */
006package org.fcrepo.persistence.ocfl.impl;
007
008import static org.apache.jena.graph.NodeFactory.createURI;
009import static org.apache.jena.rdf.model.ModelFactory.createDefaultModel;
010import static org.fcrepo.kernel.api.RdfLexicon.NON_RDF_SOURCE;
011import static org.fcrepo.persistence.ocfl.impl.OcflPersistentStorageUtils.getRdfFormat;
012import static org.slf4j.LoggerFactory.getLogger;
013
014import java.io.IOException;
015import java.io.InputStream;
016import java.util.ArrayList;
017import java.util.List;
018import java.util.Optional;
019import java.util.concurrent.Callable;
020import java.util.concurrent.atomic.AtomicReference;
021
022import javax.inject.Inject;
023import javax.validation.constraints.NotNull;
024
025import edu.wisc.library.ocfl.api.OcflRepository;
026import org.fcrepo.config.FedoraPropsConfig;
027import org.fcrepo.kernel.api.ContainmentIndex;
028import org.fcrepo.kernel.api.RdfLexicon;
029import org.fcrepo.kernel.api.RdfStream;
030import org.fcrepo.kernel.api.Transaction;
031import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
032import org.fcrepo.kernel.api.identifiers.FedoraId;
033import org.fcrepo.kernel.api.models.ResourceHeaders;
034import org.fcrepo.kernel.api.rdf.DefaultRdfStream;
035import org.fcrepo.kernel.api.services.MembershipService;
036import org.fcrepo.kernel.api.services.ReferenceService;
037import org.fcrepo.persistence.api.PersistentStorageSessionManager;
038import org.fcrepo.persistence.ocfl.api.FedoraToOcflObjectIndex;
039import org.fcrepo.search.api.Condition;
040import org.fcrepo.search.api.InvalidQueryException;
041import org.fcrepo.search.api.SearchIndex;
042import org.fcrepo.search.api.SearchParameters;
043import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
044import org.fcrepo.storage.ocfl.validation.ObjectValidator;
045
046import org.apache.jena.rdf.model.Model;
047import org.apache.jena.riot.RDFDataMgr;
048import org.slf4j.Logger;
049import org.springframework.beans.factory.annotation.Autowired;
050import org.springframework.beans.factory.annotation.Qualifier;
051import org.springframework.stereotype.Component;
052
053/**
054 * Service that does the reindexing for one OCFL object.
055 * @author whikloj
056 */
057@Component
058public class ReindexService {
059
060    @Inject
061    private PersistentStorageSessionManager persistentStorageSessionManager;
062
063    @Inject
064    private OcflObjectSessionFactory ocflObjectSessionFactory;
065
066    @Autowired
067    @Qualifier("ocflIndex")
068    private FedoraToOcflObjectIndex ocflIndex;
069
070    @Autowired
071    private OcflRepository ocflRepository;
072
073    @Autowired
074    @Qualifier("containmentIndex")
075    private ContainmentIndex containmentIndex;
076
077    @Autowired
078    @Qualifier("searchIndex")
079    private SearchIndex searchIndex;
080
081    @Autowired
082    @Qualifier("referenceService")
083    private ReferenceService referenceService;
084
085    @Inject
086    private MembershipService membershipService;
087
088    @Inject
089    private ObjectValidator objectValidator;
090
091    @Inject
092    private FedoraPropsConfig config;
093
094    private static final Logger LOGGER = getLogger(ReindexService.class);
095
096    private int membershipPageSize = 500;
097
098    public void indexOcflObject(final Transaction tx, final String ocflId) {
099        LOGGER.debug("Indexing ocflId {} in transaction {}", ocflId, tx.getId());
100
101        ocflRepository.invalidateCache(ocflId);
102        if (config.isRebuildValidation()) {
103            objectValidator.validate(ocflId, config.isRebuildFixityCheck());
104        }
105
106        try (final var session = ocflObjectSessionFactory.newSession(ocflId)) {
107            final var rootId = new AtomicReference<FedoraId>();
108            final var fedoraIds = new ArrayList<FedoraId>();
109            final var headersList = new ArrayList<ResourceHeaders>();
110
111            session.invalidateCache(ocflId);
112            session.streamResourceHeaders().forEach(storageHeaders -> {
113                final var headers = new ResourceHeadersAdapter(storageHeaders);
114
115                final var fedoraId = headers.getId();
116                fedoraIds.add(fedoraId);
117                if (headers.isArchivalGroup() || headers.isObjectRoot()) {
118                    rootId.set(fedoraId);
119                }
120
121                if (!fedoraId.isRepositoryRoot()) {
122                    var parentId = headers.getParent();
123
124                    if (headers.getParent() == null) {
125                        if (headers.isObjectRoot()) {
126                            parentId = FedoraId.getRepositoryRootId();
127                        } else {
128                            throw new IllegalStateException(
129                                    String.format("Resource %s must have a parent defined", fedoraId.getFullId()));
130                        }
131                    }
132                    final var created = headers.getCreatedDate();
133                    if (!headers.isDeleted()) {
134                        if (!headers.getInteractionModel().equals(NON_RDF_SOURCE.toString())) {
135                            final Optional<InputStream> content = session.readContent(fedoraId.getFullId())
136                                    .getContentStream();
137                            if (content.isPresent()) {
138                                try (final var stream = content.get()) {
139                                    final RdfStream rdf = parseRdf(fedoraId, stream);
140                                    this.referenceService.updateReferences(tx, fedoraId, null, rdf);
141                                } catch (final IOException e) {
142                                    LOGGER.warn("Content stream for {} closed prematurely, inbound references skipped.",
143                                            fedoraId.getFullId());
144                                    throw new RepositoryRuntimeException(e.getMessage(), e);
145                                }
146                            }
147                        }
148
149                        this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, null);
150                        headersList.add(headers.asKernelHeaders());
151                    } else {
152                        final var deleted = headers.getLastModifiedDate();
153                        this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, deleted);
154                    }
155                }
156            });
157
158            if (rootId.get() == null) {
159                throw new IllegalStateException(String.format("Failed to find the root resource in object " +
160                        "identified by %s. Please ensure that the object ID you are attempting to index " +
161                        "refers to a corresponding valid Fedora-flavored object in the OCFL repository. Additionally " +
162                        "be sure that the object ID corresponds with the object root resource (as opposed to child " +
163                        "resources within the object).", ocflId));
164            }
165
166            fedoraIds.forEach(fedoraIdentifier -> {
167                final var rootFedoraIdentifier = rootId.get();
168                ocflIndex.addMapping(tx, fedoraIdentifier, rootFedoraIdentifier, ocflId);
169                LOGGER.debug("Rebuilt fedora-to-ocfl object index entry for {}", fedoraIdentifier);
170            });
171
172            headersList.forEach(headers -> {
173                searchIndex.addUpdateIndex(tx, headers);
174                LOGGER.debug("Rebuilt searchIndex for {}", headers.getId());
175            });
176        }
177    }
178
179    /**
180     * Remove persistent sessions for a transaction to avoid memory leaks.
181     * @param transactionId the transaction id.
182     */
183    public void cleanupSession(final String transactionId) {
184        persistentStorageSessionManager.removeSession(transactionId);
185    }
186
187    /**
188     * Set the membership page size.
189     * @param pageSize the new page size.
190     */
191    public void setMembershipPageSize(final int pageSize) {
192        membershipPageSize = pageSize;
193    }
194
195    /**
196     * Reset all the indexes.
197     */
198    public void reset() {
199        ocflIndex.reset();
200        containmentIndex.reset();
201        searchIndex.reset();
202        referenceService.reset();
203        membershipService.reset();
204    }
205
206    /**
207     * Index all membership properties by querying for Direct containers, and then
208     * trying population of the membership index for each one
209     * @param transaction the transaction id.
210     */
211    public void indexMembership(final Transaction transaction) {
212        LOGGER.debug("Starting indexMembership for transaction {}", transaction);
213        final var fields = List.of(Condition.Field.FEDORA_ID);
214        final var conditions = List.of(Condition.fromEnums(Condition.Field.RDF_TYPE, Condition.Operator.EQ,
215                RdfLexicon.DIRECT_CONTAINER.getURI()));
216        int offset = 0;
217
218        try {
219            int numResults;
220            do {
221                final var params = new SearchParameters(fields, conditions, membershipPageSize,
222                        offset, Condition.Field.FEDORA_ID, "asc", false);
223
224                final var searchResult = searchIndex.doSearch(params);
225                final var resultList = searchResult.getItems();
226                numResults = resultList.size();
227
228                resultList.stream()
229                        .map(entry -> FedoraId.create((String) entry.get(Condition.Field.FEDORA_ID.toString())))
230                        .forEach(containerId -> membershipService.populateMembershipHistory(transaction, containerId));
231
232                // Results are paged, so step through pages until we reach the last one
233                offset += membershipPageSize;
234            } while (numResults == membershipPageSize);
235
236        } catch (final InvalidQueryException e) {
237            throw new RepositoryRuntimeException("Failed to repopulate membership history", e);
238        }
239        LOGGER.debug("Finished indexMembership for transaction {}", transaction);
240    }
241
242    /**
243     * Rollback changes in the transaction.
244     * @param tx the transaction
245     */
246    public void rollbackMembership(@NotNull final Transaction tx) {
247        execQuietly("Failed to rollback membership index transaction " + tx.getId(), () -> {
248            membershipService.rollbackTransaction(tx);
249            return null;
250        });
251    }
252
253    /**
254     * Executes the closure, capturing all exceptions, and logging them as errors.
255     *
256     * @param failureMessage what to print if the closure fails
257     * @param callable closure to execute
258     */
259    private void execQuietly(final String failureMessage, final Callable<Void> callable) {
260        try {
261            callable.call();
262        } catch (final Exception e) {
263            LOGGER.error(failureMessage, e);
264        }
265    }
266
267    /**
268     * Parse the inputstream from a Rdf resource to a RDFstream.
269     *
270     * @param fedoraIdentifier the resource identifier.
271     * @param inputStream the inputstream.
272     * @return an RdfStream of the resource triples.
273     */
274    private static RdfStream parseRdf(final FedoraId fedoraIdentifier, final InputStream inputStream) {
275        final Model model = createDefaultModel();
276        RDFDataMgr.read(model, inputStream, getRdfFormat().getLang());
277        final FedoraId topic = (fedoraIdentifier.isDescription() ? fedoraIdentifier.asBaseId() : fedoraIdentifier);
278        return DefaultRdfStream.fromModel(createURI(topic.getFullId()), model);
279    }
280}