001/*
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 */
006package org.fcrepo.persistence.ocfl.impl;
007
008import static org.apache.jena.graph.NodeFactory.createURI;
009import static org.apache.jena.rdf.model.ModelFactory.createDefaultModel;
010import static org.fcrepo.kernel.api.RdfLexicon.NON_RDF_SOURCE;
011import static org.fcrepo.persistence.ocfl.impl.OcflPersistentStorageUtils.getRdfFormat;
012import static org.slf4j.LoggerFactory.getLogger;
013
014import java.io.IOException;
015import java.io.InputStream;
016import java.util.ArrayList;
017import java.util.List;
018import java.util.Optional;
019import java.util.concurrent.Callable;
020import java.util.concurrent.atomic.AtomicReference;
021
022import javax.inject.Inject;
023import javax.validation.constraints.NotNull;
024
025import edu.wisc.library.ocfl.api.OcflRepository;
026import org.fcrepo.config.FedoraPropsConfig;
027import org.fcrepo.kernel.api.ContainmentIndex;
028import org.fcrepo.kernel.api.RdfLexicon;
029import org.fcrepo.kernel.api.RdfStream;
030import org.fcrepo.kernel.api.Transaction;
031import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
032import org.fcrepo.kernel.api.identifiers.FedoraId;
033import org.fcrepo.kernel.api.models.ResourceHeaders;
034import org.fcrepo.kernel.api.rdf.DefaultRdfStream;
035import org.fcrepo.kernel.api.services.MembershipService;
036import org.fcrepo.kernel.api.services.ReferenceService;
037import org.fcrepo.persistence.api.PersistentStorageSessionManager;
038import org.fcrepo.persistence.api.exceptions.ObjectExistsInOcflIndexException;
039import org.fcrepo.persistence.ocfl.api.FedoraOcflMappingNotFoundException;
040import org.fcrepo.persistence.ocfl.api.FedoraToOcflObjectIndex;
041import org.fcrepo.search.api.Condition;
042import org.fcrepo.search.api.InvalidQueryException;
043import org.fcrepo.search.api.SearchIndex;
044import org.fcrepo.search.api.SearchParameters;
045import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
046import org.fcrepo.storage.ocfl.validation.ObjectValidator;
047
048import org.apache.jena.rdf.model.Model;
049import org.apache.jena.riot.RDFDataMgr;
050import org.slf4j.Logger;
051import org.springframework.beans.factory.annotation.Autowired;
052import org.springframework.beans.factory.annotation.Qualifier;
053import org.springframework.stereotype.Component;
054
055/**
056 * Service that does the reindexing for one OCFL object.
057 * @author whikloj
058 */
059@Component
060public class ReindexService {
061
062    @Inject
063    private PersistentStorageSessionManager persistentStorageSessionManager;
064
065    @Inject
066    private OcflObjectSessionFactory ocflObjectSessionFactory;
067
068    @Autowired
069    @Qualifier("ocflIndex")
070    private FedoraToOcflObjectIndex ocflIndex;
071
072    @Autowired
073    private OcflRepository ocflRepository;
074
075    @Autowired
076    @Qualifier("containmentIndex")
077    private ContainmentIndex containmentIndex;
078
079    @Autowired
080    @Qualifier("searchIndex")
081    private SearchIndex searchIndex;
082
083    @Autowired
084    @Qualifier("referenceService")
085    private ReferenceService referenceService;
086
087    @Inject
088    private MembershipService membershipService;
089
090    @Inject
091    private ObjectValidator objectValidator;
092
093    @Inject
094    private FedoraPropsConfig config;
095
096    private static final Logger LOGGER = getLogger(ReindexService.class);
097
098    private int membershipPageSize = 500;
099
100    public void indexOcflObject(final Transaction tx, final String ocflId) {
101        LOGGER.debug("Indexing ocflId {} in transaction {}", ocflId, tx.getId());
102
103        ocflRepository.invalidateCache(ocflId);
104        if (config.isRebuildValidation()) {
105            objectValidator.validate(ocflId, config.isRebuildFixityCheck());
106        }
107
108        try (final var session = ocflObjectSessionFactory.newSession(ocflId)) {
109            final var rootId = new AtomicReference<FedoraId>();
110            final var fedoraIds = new ArrayList<FedoraId>();
111            final var headersList = new ArrayList<ResourceHeaders>();
112
113            session.invalidateCache(ocflId);
114            session.streamResourceHeaders().forEach(storageHeaders -> {
115                final var headers = new ResourceHeadersAdapter(storageHeaders);
116
117                final var fedoraId = headers.getId();
118
119                if (config.isRebuildContinue()) {
120                    try {
121                        ocflIndex.getMapping(tx, fedoraId);
122                        // We got the mapping, so we can skip this resource.
123                        throw new ObjectExistsInOcflIndexException(
124                                String.format("Skipping indexing of %s in transaction %s, because" +
125                                " it already exists in the index.", fedoraId, tx.getId())
126                        );
127                    } catch (FedoraOcflMappingNotFoundException e) {
128                        LOGGER.debug("Indexing object {} in transaction {}, because it does not yet exist in the " +
129                                "index.", fedoraId, tx.getId());
130                    }
131                }
132
133                fedoraIds.add(fedoraId);
134                if (headers.isArchivalGroup() || headers.isObjectRoot()) {
135                    rootId.set(fedoraId);
136                }
137
138                if (!fedoraId.isRepositoryRoot()) {
139                    var parentId = headers.getParent();
140
141                    if (headers.getParent() == null) {
142                        if (headers.isObjectRoot()) {
143                            parentId = FedoraId.getRepositoryRootId();
144                        } else {
145                            throw new IllegalStateException(
146                                    String.format("Resource %s must have a parent defined", fedoraId.getFullId()));
147                        }
148                    }
149                    final var created = headers.getCreatedDate();
150                    if (!headers.isDeleted()) {
151                        if (!headers.getInteractionModel().equals(NON_RDF_SOURCE.toString())) {
152                            final Optional<InputStream> content = session.readContent(fedoraId.getFullId())
153                                    .getContentStream();
154                            if (content.isPresent()) {
155                                try (final var stream = content.get()) {
156                                    final RdfStream rdf = parseRdf(fedoraId, stream);
157                                    this.referenceService.updateReferences(tx, fedoraId, null, rdf);
158                                } catch (final IOException e) {
159                                    LOGGER.warn("Content stream for {} closed prematurely, inbound references skipped.",
160                                            fedoraId.getFullId());
161                                    throw new RepositoryRuntimeException(e.getMessage(), e);
162                                }
163                            }
164                        }
165
166                        this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, null);
167                        headersList.add(headers.asKernelHeaders());
168                    } else {
169                        final var deleted = headers.getLastModifiedDate();
170                        this.containmentIndex.addContainedBy(tx, parentId, fedoraId, created, deleted);
171                    }
172                }
173            });
174
175            if (rootId.get() == null) {
176                throw new IllegalStateException(String.format("Failed to find the root resource in object " +
177                        "identified by %s. Please ensure that the object ID you are attempting to index " +
178                        "refers to a corresponding valid Fedora-flavored object in the OCFL repository. Additionally " +
179                        "be sure that the object ID corresponds with the object root resource (as opposed to child " +
180                        "resources within the object).", ocflId));
181            }
182
183            fedoraIds.forEach(fedoraIdentifier -> {
184                final var rootFedoraIdentifier = rootId.get();
185                ocflIndex.addMapping(tx, fedoraIdentifier, rootFedoraIdentifier, ocflId);
186                LOGGER.debug("Rebuilt fedora-to-ocfl object index entry for {}", fedoraIdentifier);
187            });
188
189            headersList.forEach(headers -> {
190                searchIndex.addUpdateIndex(tx, headers);
191                LOGGER.debug("Rebuilt searchIndex for {}", headers.getId());
192            });
193        }
194    }
195
196    /**
197     * Remove persistent sessions for a transaction to avoid memory leaks.
198     * @param transactionId the transaction id.
199     */
200    public void cleanupSession(final String transactionId) {
201        persistentStorageSessionManager.removeSession(transactionId);
202    }
203
204    /**
205     * Set the membership page size.
206     * @param pageSize the new page size.
207     */
208    public void setMembershipPageSize(final int pageSize) {
209        membershipPageSize = pageSize;
210    }
211
212    /**
213     * Reset all the indexes.
214     */
215    public void reset() {
216        ocflIndex.reset();
217        containmentIndex.reset();
218        searchIndex.reset();
219        referenceService.reset();
220        membershipService.reset();
221    }
222
223    /**
224     * Index all membership properties by querying for Direct containers, and then
225     * trying population of the membership index for each one
226     * @param transaction the transaction id.
227     */
228    public void indexMembership(final Transaction transaction) {
229        LOGGER.debug("Starting indexMembership for transaction {}", transaction);
230        final var fields = List.of(Condition.Field.FEDORA_ID);
231        final var conditions = List.of(Condition.fromEnums(Condition.Field.RDF_TYPE, Condition.Operator.EQ,
232                RdfLexicon.DIRECT_CONTAINER.getURI()));
233        int offset = 0;
234
235        try {
236            int numResults;
237            do {
238                final var params = new SearchParameters(fields, conditions, membershipPageSize,
239                        offset, Condition.Field.FEDORA_ID, "asc", false);
240
241                final var searchResult = searchIndex.doSearch(params);
242                final var resultList = searchResult.getItems();
243                numResults = resultList.size();
244
245                resultList.stream()
246                        .map(entry -> FedoraId.create((String) entry.get(Condition.Field.FEDORA_ID.toString())))
247                        .forEach(containerId -> membershipService.populateMembershipHistory(transaction, containerId));
248
249                // Results are paged, so step through pages until we reach the last one
250                offset += membershipPageSize;
251            } while (numResults == membershipPageSize);
252
253        } catch (final InvalidQueryException e) {
254            throw new RepositoryRuntimeException("Failed to repopulate membership history", e);
255        }
256        LOGGER.debug("Finished indexMembership for transaction {}", transaction);
257    }
258
259    /**
260     * Rollback changes in the transaction.
261     * @param tx the transaction
262     */
263    public void rollbackMembership(@NotNull final Transaction tx) {
264        execQuietly("Failed to rollback membership index transaction " + tx.getId(), () -> {
265            membershipService.rollbackTransaction(tx);
266            return null;
267        });
268    }
269
270    /**
271     * Executes the closure, capturing all exceptions, and logging them as errors.
272     *
273     * @param failureMessage what to print if the closure fails
274     * @param callable closure to execute
275     */
276    private void execQuietly(final String failureMessage, final Callable<Void> callable) {
277        try {
278            callable.call();
279        } catch (final Exception e) {
280            LOGGER.error(failureMessage, e);
281        }
282    }
283
284    /**
285     * Parse the inputstream from a Rdf resource to a RDFstream.
286     *
287     * @param fedoraIdentifier the resource identifier.
288     * @param inputStream the inputstream.
289     * @return an RdfStream of the resource triples.
290     */
291    private static RdfStream parseRdf(final FedoraId fedoraIdentifier, final InputStream inputStream) {
292        final Model model = createDefaultModel();
293        RDFDataMgr.read(model, inputStream, getRdfFormat().getLang());
294        final FedoraId topic = (fedoraIdentifier.isDescription() ? fedoraIdentifier.asBaseId() : fedoraIdentifier);
295        return DefaultRdfStream.fromModel(createURI(topic.getFullId()), model);
296    }
297}