001/*
002 * Copyright 2019 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.fcrepo.migration.handlers.ocfl;
018
019import at.favre.lib.bytes.Bytes;
020import com.google.common.base.Preconditions;
021import com.google.common.base.Strings;
022import com.google.common.collect.Sets;
023import org.apache.commons.codec.digest.DigestUtils;
024import org.apache.commons.io.IOUtils;
025import org.apache.commons.lang3.StringUtils;
026import org.apache.jena.datatypes.xsd.XSDDatatype;
027import org.apache.jena.rdf.model.Model;
028import org.apache.jena.rdf.model.ModelFactory;
029import org.apache.jena.riot.Lang;
030import org.apache.jena.riot.RDFDataMgr;
031import org.apache.tika.config.TikaConfig;
032import org.apache.tika.detect.Detector;
033import org.apache.tika.io.TikaInputStream;
034import org.apache.tika.metadata.Metadata;
035import org.apache.tika.mime.MimeType;
036import org.apache.tika.mime.MimeTypeException;
037import org.apache.tika.mime.MimeTypes;
038import org.fcrepo.migration.ContentDigest;
039import org.fcrepo.migration.DatastreamVersion;
040import org.fcrepo.migration.FedoraObjectVersionHandler;
041import org.fcrepo.migration.MigrationType;
042import org.fcrepo.migration.ObjectInfo;
043import org.fcrepo.migration.ObjectVersionReference;
044import org.fcrepo.migration.ResourceMigrationType;
045import org.fcrepo.storage.ocfl.InteractionModel;
046import org.fcrepo.storage.ocfl.OcflObjectSession;
047import org.fcrepo.storage.ocfl.OcflObjectSessionFactory;
048import org.fcrepo.storage.ocfl.ResourceHeaders;
049import org.fcrepo.storage.ocfl.ResourceHeadersVersion;
050import org.fcrepo.storage.ocfl.exception.NotFoundException;
051import org.slf4j.Logger;
052
053import java.io.BufferedInputStream;
054import java.io.ByteArrayInputStream;
055import java.io.ByteArrayOutputStream;
056import java.io.IOException;
057import java.io.InputStream;
058import java.io.UncheckedIOException;
059import java.net.URI;
060import java.nio.charset.StandardCharsets;
061import java.nio.file.Files;
062import java.security.DigestInputStream;
063import java.security.MessageDigest;
064import java.security.NoSuchAlgorithmException;
065import java.time.Instant;
066import java.time.OffsetDateTime;
067import java.time.ZoneOffset;
068import java.util.ArrayList;
069import java.util.HashMap;
070import java.util.HashSet;
071import java.util.Map;
072import java.util.Set;
073import java.util.concurrent.atomic.AtomicBoolean;
074
075import static org.slf4j.LoggerFactory.getLogger;
076
077/**
078 * Writes a Fedora object as a single ArchiveGroup.
079 * <p>
080 * All datastreams and object metadata from a fcrepo3 object are persisted to a
081 * single OCFL object (ArchiveGroup in fcrepo6 parlance).
082 * </p>
083 * <p>
084 * The contents of each datastream are written verbatim. No attempt is made to
085 * re-write the RELS-EXT to replace subjects and objects with their LDP
086 * counterparts.
087 * </p>
088 * <p>
089 * Note: fedora-specific OCFL serialization features (such as redirects,
090 * container metadata, etc) is not fully defined yet, so are not included here
091 *
092 * @author apb@jhu.edu
093 */
094public class ArchiveGroupHandler implements FedoraObjectVersionHandler {
095
096    private static final Logger LOGGER = getLogger(ArchiveGroupHandler.class);
097
098    private static final String FCREPO_ROOT = "info:fedora/";
099    private static final String FCRMETA_SUFFIX = "/fcr:metadata";
100
101    private static final Map<String, String> externalHandlingMap = Map.of(
102            "E", "proxy",
103            "R", "redirect"
104    );
105
106    private static final String INLINE_XML = "X";
107
108    private static final String DS_INACTIVE = "I";
109    private static final String DS_DELETED = "D";
110
111    private static final String OBJ_STATE_PROP = "info:fedora/fedora-system:def/model#state";
112    private static final String DOWNLOAD_NAME_PROP = "info:fedora/fedora-system:def/model#downloadFilename";
113    private static final String OBJ_INACTIVE = "Inactive";
114    private static final String OBJ_DELETED = "Deleted";
115
116    private static final String RELS_EXT = "RELS-EXT";
117    private static final String RELS_INT = "RELS-INT";
118
119    private final OcflObjectSessionFactory sessionFactory;
120    private final boolean addDatastreamExtensions;
121    private final boolean deleteInactive;
122    private final boolean foxmlFile;
123    private final MigrationType migrationType;
124    private final ResourceMigrationType resourceMigrationType;
125    private final String user;
126    private final String idPrefix;
127    private final Detector mimeDetector;
128    private final boolean headOnly;
129    private final boolean disableChecksumValidation;
130
131    /**
132     * Create an ArchiveGroupHandler,
133     *
134     * @param sessionFactory
135     *        OCFL session factory
136     * @param migrationType
137     *        the type of migration to do
138     * @param resourceMigrationType
139     *        how resources should be migrated
140     * @param addDatastreamExtensions
141     *        true if datastreams should be written with file extensions
142     * @param deleteInactive
143     *        true if inactive objects and datastreams should be migrated as deleted
144     * @param foxmlFile
145     *        true if foxml file should be migrated as a whole file, instead of creating property files
146     * @param user
147     *        the username to associated with the migrated resources
148     * @param idPrefix
149     *        the prefix to add to the Fedora 3 pid (default "info:fedora/", like Fedora 3)
150     * @param headOnly
151     *        flag to enable head only migrations
152     * @param disableChecksumValidation
153     */
154    public ArchiveGroupHandler(final OcflObjectSessionFactory sessionFactory,
155                               final MigrationType migrationType,
156                               final ResourceMigrationType resourceMigrationType,
157                               final boolean addDatastreamExtensions,
158                               final boolean deleteInactive,
159                               final boolean foxmlFile,
160                               final String user,
161                               final String idPrefix,
162                               final boolean headOnly,
163                               final boolean disableChecksumValidation) {
164        this.sessionFactory = Preconditions.checkNotNull(sessionFactory, "sessionFactory cannot be null");
165        this.migrationType = Preconditions.checkNotNull(migrationType, "migrationType cannot be null");
166        this.resourceMigrationType = Preconditions.checkNotNull(resourceMigrationType,
167                "resourceMigrationType cannot be null");
168        this.addDatastreamExtensions = addDatastreamExtensions;
169        this.deleteInactive = deleteInactive;
170        this.foxmlFile = foxmlFile;
171        this.user = Preconditions.checkNotNull(Strings.emptyToNull(user), "user cannot be blank");
172        this.idPrefix = idPrefix;
173        this.headOnly = headOnly;
174        this.disableChecksumValidation = disableChecksumValidation;
175        try {
176            this.mimeDetector = new TikaConfig().getDetector();
177        } catch (Exception e) {
178            throw new RuntimeException(e);
179        }
180    }
181
182    @Override
183    public void processObjectVersions(final Iterable<ObjectVersionReference> versions, final ObjectInfo objectInfo) {
184        // We use the PID to identify the OCFL object
185        final String objectId = objectInfo.getPid();
186        final String f6ObjectId = idPrefix + objectId;
187
188        // We need to manually keep track of the datastream creation dates
189        final Map<String, String> dsCreateDates = new HashMap<>();
190
191        String objectState = null;
192        OffsetDateTime objectCreation = null;
193        OcflObjectSession objectSession = null;
194
195        final Map<String, String> datastreamStates = new HashMap<>();
196        // tracks the triples used to create containers and binary descriptions
197        final Map<String, MetaHolder> metaMap = new HashMap<>();
198        // tracks info about binary resources needed to construct filenames
199        final Map<String, BinaryMeta> binaryMeta = new HashMap<>();
200        // tracks filenames pulled from RELS-INT
201        final Map<String, String> filenameMap = new HashMap<>();
202
203        for (var ov : versions) {
204            // tracks the binary descriptions that need to be written
205            final Set<String> toWrite = new HashSet<>();
206            // tracks the binaries that need their filename updated base on RELS-INT
207            final Set<String> relsFilenameUpdates = new HashSet<>();
208            // tracks the binaries that need their filename updated based on a RELS-INT removal
209            final Map<String, String> relsDeletedFilenames = new HashMap<>();
210
211            // reuse the objectSession when headOnly is set
212            objectSession = (objectSession == null || !headOnly) ? newSession(f6ObjectId) : objectSession;
213
214            if (ov.isFirstVersion()) {
215                if (objectSession.containsResource(f6ObjectId)) {
216                    throw new RuntimeException(f6ObjectId + " already exists!");
217                }
218                objectCreation = OffsetDateTime.parse(ov.getVersionDate());
219                objectState = getObjectState(ov, objectId);
220                // Object properties are written only once (as fcrepo3 object properties were unversioned).
221                if (foxmlFile) {
222                    try (InputStream is = new BufferedInputStream(Files.newInputStream(objectInfo.getFoxmlPath()))) {
223                        final var foxmlDsId = f6ObjectId + "/FOXML";
224                        final var headers = createHeaders(foxmlDsId, f6ObjectId,
225                                InteractionModel.NON_RDF).build();
226                        objectSession.writeResource(headers, is);
227                        //mark FOXML as a deleted datastream so it gets deleted in handleDeletedResources()
228                        datastreamStates.put(foxmlDsId, DS_DELETED);
229                    } catch (IOException io) {
230                        LOGGER.error("error writing " + objectId + " FOXML file to " + f6ObjectId + ": " + io);
231                        throw new UncheckedIOException(io);
232                    }
233                } else {
234                    final var objectHeaders = createObjectHeaders(f6ObjectId, ov);
235                    final var content = getObjTriples(ov, objectId);
236                    final var meta = MetaHolder.fromContent(content, objectHeaders);
237                    metaMap.put(f6ObjectId, meta);
238                    objectSession.writeResource(meta.headers.build(), meta.constructTriples());
239                }
240            }
241
242            final var datastreamSessions = new HashMap<String, OcflObjectSession>();
243
244            // Write datastreams and their metadata
245            for (var dv : ov.listChangedDatastreams()) {
246                final var mimeType = resolveMimeType(dv);
247                final String dsId = dv.getDatastreamInfo().getDatastreamId();
248                final String f6DsId = resolveF6DatastreamId(dsId, f6ObjectId);
249                final var datastreamFilename = lastPartFromId(f6DsId);
250
251                final var datastreamSession = datastreamSession(f6DsId, objectSession);
252                datastreamSessions.putIfAbsent(f6DsId, datastreamSession);
253
254                if (dv.isFirstVersionIn(ov.getObject())) {
255                    dsCreateDates.put(dsId, dv.getCreated());
256                    datastreamStates.put(f6DsId, dv.getDatastreamInfo().getState());
257                }
258
259                final var createDate = dsCreateDates.get(dsId);
260
261                final var filename = resolveFilename(datastreamFilename,
262                        dv.getLabel(), filenameMap.get(f6DsId), mimeType);
263
264                relsDeletedFilenames.remove(f6DsId);
265
266                final var datastreamHeaders = createDatastreamHeaders(dv, f6DsId, f6ObjectId,
267                        filename, mimeType, createDate);
268
269                binaryMeta.put(f6DsId, new BinaryMeta(datastreamFilename, mimeType, dv.getLabel()));
270
271                if (externalHandlingMap.containsKey(dv.getDatastreamInfo().getControlGroup())) {
272                    InputStream content = null;
273                    // for plain OCFL migrations, write a file containing the external/redirect URL
274                    if (migrationType == MigrationType.PLAIN_OCFL) {
275                        content = IOUtils.toInputStream(dv.getExternalOrRedirectURL(), StandardCharsets.UTF_8);
276                    }
277                    datastreamSession.writeResource(datastreamHeaders, content);
278                } else {
279                    try (var contentStream = dv.getContent()) {
280                        writeDatastreamContent(dv, datastreamHeaders, contentStream, datastreamSession);
281                    } catch (final IOException e) {
282                        throw new UncheckedIOException(e);
283                    }
284                }
285
286                if (!foxmlFile) {
287                    final var f6DescId = f6DescriptionId(f6DsId);
288                    final var descriptionHeaders = createDescriptionHeaders(f6DsId,
289                            datastreamHeaders);
290                    final var descriptionTriples = getDsTriples(dv, f6DsId, createDate);
291                    metaMap.computeIfAbsent(f6DescId, k -> new MetaHolder())
292                            .setHeaders(descriptionHeaders)
293                            .setContentTriples(descriptionTriples);
294                    toWrite.add(f6DescId);
295
296                    if (RELS_EXT.equals(dsId) || RELS_INT.equals(dsId)) {
297                        final var triples = parseRdfXml(dv);
298                        if (RELS_EXT.equals(dsId)) {
299                            metaMap.get(f6ObjectId).setRelsTriples(triples);
300                            toWrite.add(f6ObjectId);
301                        } else {
302                            final Map<String, Model> splitModels = splitRelsInt(triples);
303                            final var oldIds = new HashSet<>(filenameMap.keySet());
304                            filenameMap.clear();
305
306                            splitModels.forEach((id, model) -> {
307                                final var descId = f6DescriptionId(id);
308                                metaMap.computeIfAbsent(descId, k -> new MetaHolder())
309                                        .setRelsTriples(model);
310                                toWrite.add(descId);
311
312                                // Check to see if there are any file names that need updated
313                                for (final var it = model.listStatements(); it.hasNext(); ) {
314                                    final var statement = it.next();
315                                    if (DOWNLOAD_NAME_PROP.equals(statement.getPredicate().getURI())) {
316                                        filenameMap.put(id, statement.getObject().toString());
317                                        relsFilenameUpdates.add(id);
318                                        break;
319                                    }
320                                }
321                            });
322
323                            // The filename was set once but is no longer
324                            final var deleted = Sets.difference(oldIds, filenameMap.keySet());
325                            deleted.forEach(id -> {
326                                final var meta = binaryMeta.get(id);
327                                if (meta != null) {
328                                    relsDeletedFilenames.put(id, resolveFilename(meta.name, meta.label,
329                                            null, meta.mimeType));
330                                }
331                            });
332                        }
333                    }
334                }
335            }
336
337            writeMeta(toWrite, metaMap, objectSession, datastreamSessions);
338            updateFilenames(relsFilenameUpdates, filenameMap, relsDeletedFilenames, objectSession, datastreamSessions);
339
340            if (!headOnly) {
341                LOGGER.debug("Committing object <{}>", f6ObjectId);
342
343                final var creationTimestamp = OffsetDateTime.parse(ov.getVersionDate());
344
345                objectSession.versionCreationTimestamp(creationTimestamp);
346                objectSession.commit();
347
348                if (resourceMigrationType == ResourceMigrationType.ATOMIC) {
349                    datastreamSessions.forEach((id, session) -> {
350                        LOGGER.debug("Committing object <{}>", id);
351                        session.versionCreationTimestamp(creationTimestamp);
352                        session.commit();
353                    });
354                }
355            }
356        }
357
358        handleDeletedResources(f6ObjectId, objectState, datastreamStates, objectSession);
359
360        // final commit when headOnly is set
361        if (headOnly && objectSession != null) {
362            LOGGER.debug("Committing object <{}>", f6ObjectId);
363            objectSession.versionCreationTimestamp(objectCreation);
364            objectSession.commit();
365        }
366    }
367
368    /**
369     * Resolves the filename of the datastream based on the following precedence:
370     *
371     * 1. info:fedora/fedora-system:def/model#downloadFilename from RELS-INT
372     * 2. LABEL from datastream meta
373     * 3. Name of the datastream
374     *
375     * If extensions should be added, then an extension is picked based on the mime type. If the filename already
376     * includes a `.` then no extension is added.
377     *
378     * @param dsName the name of the datastream
379     * @param labelName the datastream's label
380     * @param downloadName the download name from RELS-INT
381     * @param mimeType the datastream's mime type
382     * @return the resolved filename
383     */
384    private String resolveFilename(final String dsName,
385                                   final String labelName,
386                                   final String downloadName,
387                                   final String mimeType) {
388        String filename;
389        if (StringUtils.isNotBlank(downloadName)) {
390            filename = downloadName;
391        } else if (StringUtils.isNotBlank(labelName)) {
392            filename = labelName;
393        } else {
394            filename = dsName;
395        }
396
397        if (addDatastreamExtensions
398                && StringUtils.isNotBlank(mimeType)
399                && !filename.contains(".")) {
400            filename += getExtension(mimeType);
401        }
402
403        return filename;
404    }
405
406    /**
407     * RDF resources are written after writing all other binaries in the version because they can be affected by
408     * RELS-INT or RELS-EXT updates.
409     *
410     * @param toWrite the set of resources that should be written to this version
411     * @param metaMap the map of all known rdf resources
412     * @param objectSession the ocfl session for the object
413     * @param datastreamSessions the ocfl sessions for the datastreams
414     */
415    private void writeMeta(final Set<String> toWrite,
416                           final Map<String, MetaHolder> metaMap,
417                           final OcflObjectSession objectSession,
418                           final Map<String, OcflObjectSession> datastreamSessions) {
419        for (final var id : toWrite) {
420            final var meta = metaMap.get(id);
421
422            if (meta.headers == null) {
423                // This only happens if there's a RELS-INT that references a datastream before it exists.
424                // Skip for now. The triples will be added once the datastream exists.
425                continue;
426            }
427
428            final var session = datastreamSessions.computeIfAbsent(id.replace(FCRMETA_SUFFIX, ""),
429                    k -> datastreamSession(k, objectSession));
430
431            // Need to copy over the memento created date from the existing headers because it may have been updated
432            // when a description's binary was updated
433            if (migrationType == MigrationType.FEDORA_OCFL) {
434                try {
435                    final var existingHeaders = session.readHeaders(id);
436                    meta.headers.withMementoCreatedDate(existingHeaders.getMementoCreatedDate());
437                } catch (NotFoundException e) {
438                    // this just means the resource hasn't been written yet
439                }
440            }
441            session.writeResource(meta.headers.build(), meta.constructTriples());
442        }
443    }
444
445    private void updateFilenames(final Set<String> toUpdate,
446                                 final Map<String, String> filenameMap,
447                                 final Map<String, String> relsDeletedFilenames,
448                                 final OcflObjectSession objectSession,
449                                 final Map<String, OcflObjectSession> datastreamSessions) {
450        if (migrationType == MigrationType.FEDORA_OCFL) {
451            toUpdate.forEach(id -> {
452                final var session = datastreamSessions.computeIfAbsent(id.replace(FCRMETA_SUFFIX, ""),
453                                                                       k -> datastreamSession(k, objectSession));
454                final var origHeaders = session.readHeaders(id);
455                final var filename = filenameMap.get(id);
456                if (StringUtils.isNotBlank(filename)) {
457                    final var newHeaders = ResourceHeaders.builder(origHeaders).withFilename(filename).build();
458                    session.writeHeaders(newHeaders);
459                }
460            });
461            relsDeletedFilenames.forEach((id, filename) -> {
462                final var session = datastreamSessions.computeIfAbsent(id.replace(FCRMETA_SUFFIX, ""),
463                                                                       k -> datastreamSession(k, objectSession));
464                final var origHeaders = session.readHeaders(id);
465                final var newHeaders = ResourceHeaders.builder(origHeaders).withFilename(filename).build();
466                session.writeHeaders(newHeaders);
467            });
468        }
469    }
470
471    private boolean fedora3DigestValid(final ContentDigest f3Digest) {
472        return f3Digest != null && StringUtils.isNotBlank(f3Digest.getType()) &&
473                StringUtils.isNotBlank(f3Digest.getDigest());
474    }
475
476    private void writeDatastreamContent(final DatastreamVersion dv,
477                                        final ResourceHeaders datastreamHeaders,
478                                        final InputStream contentStream,
479                                        final OcflObjectSession session) throws IOException {
480        if (disableChecksumValidation) {
481            session.writeResource(datastreamHeaders, contentStream);
482            return;
483        }
484        final var f3Digest = dv.getContentDigest();
485        final var ocflObjectId = session.ocflObjectId();
486        final var datastreamId = dv.getDatastreamInfo().getDatastreamId();
487        final var datastreamControlGroup = dv.getDatastreamInfo().getControlGroup();
488        if (fedora3DigestValid(f3Digest)) {
489            try {
490                final var messageDigest = MessageDigest.getInstance(f3Digest.getType());
491                if (migrationType == MigrationType.PLAIN_OCFL) {
492                    session.writeResource(datastreamHeaders, contentStream);
493                } else {
494                    try (var digestStream = new DigestInputStream(contentStream, messageDigest)) {
495                        session.writeResource(datastreamHeaders, digestStream);
496                        final var expectedDigest = f3Digest.getDigest();
497                        final var actualDigest = Bytes.wrap(digestStream.getMessageDigest().digest()).encodeHex();
498                        if (!actualDigest.equalsIgnoreCase(expectedDigest)) {
499                            final var msg = String.format("%s/%s: digest %s doesn't match expected digest %s",
500                                    ocflObjectId, datastreamId, actualDigest, expectedDigest);
501                            throw new RuntimeException(msg);
502                        }
503                    }
504                }
505            } catch (final NoSuchAlgorithmException e) {
506                final var msg = String.format("%s/%s: no digest algorithm %s. Writing resource & continuing.",
507                        ocflObjectId, datastreamId, f3Digest.getType());
508                LOGGER.warn(msg);
509                session.writeResource(datastreamHeaders, contentStream);
510            }
511        } else {
512            if (datastreamControlGroup.equalsIgnoreCase("M")) {
513                final var msg = String.format("%s/%s: missing/invalid digest. Writing resource & continuing.",
514                        ocflObjectId, datastreamId);
515                LOGGER.warn(msg);
516            }
517            session.writeResource(datastreamHeaders, contentStream);
518        }
519    }
520
521    private void handleDeletedResources(final String f6ObjectId,
522                                        final String objectState,
523                                        final Map<String, String> datastreamStates,
524                                        final OcflObjectSession objectSession) {
525        final OcflObjectSession session = headOnly ? objectSession : newSession(f6ObjectId);
526        final var datastreamSessions = new HashMap<String, OcflObjectSession>();
527
528        try {
529            final var now = OffsetDateTime.now().withOffsetSameInstant(ZoneOffset.UTC);
530            final var hasDeletes = new AtomicBoolean(false);
531
532            if (OBJ_DELETED.equals(objectState) || (deleteInactive && OBJ_INACTIVE.equals(objectState))) {
533                hasDeletes.set(true);
534
535                datastreamStates.keySet().forEach(f6DsId -> {
536                    final var datastreamSession = datastreamSessions.computeIfAbsent(f6DsId,
537                            k -> datastreamSession(f6DsId, session));
538                    deleteDatastream(f6DsId, now.toInstant(), datastreamSession);
539                });
540
541                if (migrationType == MigrationType.PLAIN_OCFL) {
542                    deleteOcflMigratedResource(f6ObjectId, InteractionModel.BASIC_CONTAINER, session);
543                } else {
544                    deleteF6MigratedResource(f6ObjectId, now.toInstant(), session);
545                }
546            } else {
547                datastreamStates.forEach((f6DsId, state) -> {
548                    if (DS_DELETED.equals(state) || (deleteInactive && DS_INACTIVE.equals(state))) {
549                        final var datastreamSession = datastreamSessions.computeIfAbsent(f6DsId,
550                                k -> datastreamSession(f6DsId, session));
551                        hasDeletes.set(true);
552                        deleteDatastream(f6DsId, now.toInstant(), datastreamSession);
553                    }
554                });
555            }
556
557            if (!headOnly && hasDeletes.get()) {
558                session.versionCreationTimestamp(now);
559                session.commit();
560
561                if (resourceMigrationType == ResourceMigrationType.ATOMIC) {
562                    datastreamSessions.forEach((id, dsSession) -> {
563                        dsSession.versionCreationTimestamp(now);
564                        dsSession.commit();
565                    });
566                }
567            } else if (!headOnly) {
568                session.abort();
569                if (resourceMigrationType == ResourceMigrationType.ATOMIC) {
570                    datastreamSessions.forEach((id, dsSession) -> {
571                        dsSession.abort();
572                    });
573                }
574            }
575        } catch (RuntimeException e) {
576            session.abort();
577            throw e;
578        }
579    }
580
581    private String f6DescriptionId(final String f6ResourceId) {
582        return f6ResourceId + FCRMETA_SUFFIX;
583    }
584
585    private String lastPartFromId(final String id) {
586        return id.substring(id.lastIndexOf('/') + 1);
587    }
588
589    private String resolveF6DatastreamId(final String datastreamId, final String f6ObjectId) {
590        return f6ObjectId + "/" + datastreamId;
591    }
592
593    private ResourceHeaders.Builder createHeaders(final String id,
594                                                  final String parentId,
595                                                  final InteractionModel model) {
596        final var headers = ResourceHeaders.builder();
597        headers.withHeadersVersion(ResourceHeadersVersion.V1_0);
598        headers.withId(id);
599        headers.withParent(parentId);
600        headers.withInteractionModel(model.getUri());
601        return headers;
602    }
603
604    private ResourceHeaders.Builder createObjectHeaders(final String f6ObjectId, final ObjectVersionReference ov) {
605        final var headers = createHeaders(f6ObjectId, FCREPO_ROOT, InteractionModel.BASIC_CONTAINER);
606        headers.withArchivalGroup(resourceMigrationType == ResourceMigrationType.ARCHIVAL);
607        headers.withObjectRoot(true);
608        headers.withLastModifiedBy(user);
609        headers.withCreatedBy(user);
610
611        ov.getObjectProperties().listProperties().forEach(p -> {
612            if (p.getName().contains("lastModifiedDate")) {
613                final var lastModified = Instant.parse(p.getValue());
614                headers.withLastModifiedDate(lastModified);
615                headers.withMementoCreatedDate(lastModified);
616                headers.withStateToken(DigestUtils.md5Hex(
617                        String.valueOf(lastModified.toEpochMilli())).toUpperCase());
618            } else if (p.getName().contains("createdDate")) {
619                headers.withCreatedDate(Instant.parse(p.getValue()));
620            }
621        });
622
623        return headers;
624    }
625
626    private ResourceHeaders createDatastreamHeaders(final DatastreamVersion dv,
627                                                    final String f6DsId,
628                                                    final String f6ObjectId,
629                                                    final String filename,
630                                                    final String mime,
631                                                    final String createDate) {
632        final var lastModified = Instant.parse(dv.getCreated());
633        final var headers = createHeaders(f6DsId, f6ObjectId, InteractionModel.NON_RDF);
634        if (resourceMigrationType == ResourceMigrationType.ARCHIVAL) {
635            headers.withArchivalGroupId(f6ObjectId);
636        }
637        headers.withFilename(filename);
638        headers.withCreatedDate(Instant.parse(createDate));
639        headers.withLastModifiedDate(lastModified);
640        headers.withLastModifiedBy(user);
641        headers.withCreatedBy(user);
642        headers.withMementoCreatedDate(lastModified);
643
644        if (externalHandlingMap.containsKey(dv.getDatastreamInfo().getControlGroup())) {
645            headers.withExternalHandling(
646                    externalHandlingMap.get(dv.getDatastreamInfo().getControlGroup()));
647            headers.withExternalUrl(dv.getExternalOrRedirectURL());
648        }
649
650        headers.withArchivalGroup(false);
651        headers.withObjectRoot(resourceMigrationType == ResourceMigrationType.ATOMIC);
652        if (dv.getSize() > -1 && !INLINE_XML.equals(dv.getDatastreamInfo().getControlGroup())) {
653            headers.withContentSize(dv.getSize());
654        }
655
656        if (dv.getContentDigest() != null && !Strings.isNullOrEmpty(dv.getContentDigest().getDigest())) {
657            final var digest = dv.getContentDigest();
658            final var digests = new ArrayList<URI>();
659            digests.add(URI.create("urn:" + digest.getType().toLowerCase() + ":" + digest.getDigest().toLowerCase()));
660            headers.withDigests(digests);
661        }
662
663        headers.withMimeType(mime);
664        headers.withStateToken(DigestUtils.md5Hex(
665                String.valueOf(lastModified.toEpochMilli())).toUpperCase());
666
667        return headers.build();
668    }
669
670    private ResourceHeaders.Builder createDescriptionHeaders(final String f6DsId,
671                                                             final ResourceHeaders datastreamHeaders) {
672        final var id = f6DescriptionId(f6DsId);
673        final var headers = createHeaders(id, f6DsId, InteractionModel.NON_RDF_DESCRIPTION);
674
675        if (resourceMigrationType == ResourceMigrationType.ARCHIVAL) {
676            headers.withArchivalGroupId(datastreamHeaders.getArchivalGroupId());
677        }
678        headers.withCreatedDate(datastreamHeaders.getCreatedDate());
679        headers.withLastModifiedDate(datastreamHeaders.getLastModifiedDate());
680        headers.withCreatedBy(datastreamHeaders.getCreatedBy());
681        headers.withLastModifiedBy(datastreamHeaders.getLastModifiedBy());
682        headers.withMementoCreatedDate(datastreamHeaders.getMementoCreatedDate());
683
684        headers.withArchivalGroup(false);
685        headers.withObjectRoot(false);
686        headers.withStateToken(datastreamHeaders.getStateToken());
687
688        return headers;
689    }
690
691    private String resolveMimeType(final DatastreamVersion dv) {
692        String mime = dv.getMimeType();
693
694        if (Strings.isNullOrEmpty(mime)) {
695            final var meta = new Metadata();
696            meta.set(Metadata.RESOURCE_NAME_KEY, dv.getDatastreamInfo().getDatastreamId());
697            try (var content = TikaInputStream.get(dv.getContent())) {
698                mime = mimeDetector.detect(content, meta).toString();
699            } catch (IOException e) {
700                throw new UncheckedIOException(e);
701            }
702        }
703
704        return mime;
705    }
706
707    private void deleteDatastream(final String id,
708                                  final Instant lastModified,
709                                  final OcflObjectSession session) {
710        if (migrationType == MigrationType.PLAIN_OCFL) {
711            deleteOcflMigratedResource(id, InteractionModel.NON_RDF, session);
712            deleteOcflMigratedResource(f6DescriptionId(id), InteractionModel.NON_RDF_DESCRIPTION, session);
713        } else {
714            deleteF6MigratedResource(id, lastModified, session);
715            deleteF6MigratedResource(f6DescriptionId(id), lastModified, session);
716        }
717    }
718
719    private void deleteF6MigratedResource(final String id,
720                                          final Instant lastModified,
721                                          final OcflObjectSession session) {
722        LOGGER.debug("Deleting resource {}", id);
723        final var headers = session.readHeaders(id);
724        session.deleteContentFile(ResourceHeaders.builder(headers)
725                .withDeleted(true)
726                .withLastModifiedDate(lastModified)
727                .withMementoCreatedDate(lastModified)
728                .build());
729    }
730
731    private void deleteOcflMigratedResource(final String id,
732                                            final InteractionModel interactionModel,
733                                            final OcflObjectSession session) {
734        LOGGER.debug("Deleting resource {}", id);
735        session.deleteContentFile(ResourceHeaders.builder()
736                .withId(id)
737                .withInteractionModel(interactionModel.getUri())
738                .build());
739    }
740
741    private String getObjectState(final ObjectVersionReference ov, final String pid) {
742        return ov.getObjectProperties().listProperties().stream()
743                .filter(prop -> OBJ_STATE_PROP.equals(prop.getName()))
744                .findFirst()
745                .orElseThrow(() -> new IllegalStateException(String.format("Object %s is missing state information",
746                        pid)))
747                .getValue();
748    }
749
750    // Get object-level triples
751    private static Model getObjTriples(final ObjectVersionReference o, final String pid) {
752        final Model triples = ModelFactory.createDefaultModel();
753        final String uri = "info:fedora/" + pid;
754
755        o.getObjectProperties().listProperties().forEach(p -> {
756            if (p.getName().contains("Date")) {
757                addDateLiteral(triples, uri, p.getName(), p.getValue());
758            } else {
759                addStringLiteral(triples, uri, p.getName(), p.getValue());
760            }
761        });
762
763        return triples;
764    }
765
766    // Get datastream-level triples
767    private Model getDsTriples(final DatastreamVersion dv,
768                                            final String f6DsId,
769                                            final String createDate) {
770        final Model triples = ModelFactory.createDefaultModel();
771
772        if (migrationType == MigrationType.PLAIN_OCFL) {
773            // These triples are server managed in F6
774            addDateLiteral(triples,
775                    f6DsId,
776                    "http://fedora.info/definitions/v4/repository#created",
777                    createDate);
778            addDateLiteral(triples,
779                    f6DsId,
780                    "http://fedora.info/definitions/v4/repository#lastModified",
781                    dv.getCreated());
782            addStringLiteral(triples,
783                    f6DsId,
784                    "http://purl.org/dc/terms/identifier",
785                    dv.getDatastreamInfo().getDatastreamId());
786            addStringLiteral(triples,
787                    f6DsId,
788                    "http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hasMimeType",
789                    dv.getMimeType());
790            addLongLiteral(triples,
791                    f6DsId,
792                    "http://www.loc.gov/premis/rdf/v1#size",
793                    dv.getSize());
794
795            if (dv.getContentDigest() != null) {
796                addStringLiteral(triples,
797                        f6DsId,
798                        "http://www.loc.gov/premis/rdf/v1#hasMessageDigest",
799                        "urn:" + dv.getContentDigest().getType().toLowerCase() + ":" +
800                                dv.getContentDigest().getDigest().toLowerCase());
801            }
802        }
803
804        addStringLiteral(triples,
805                f6DsId,
806                "http://purl.org/dc/terms/title",
807                dv.getLabel());
808        addStringLiteral(triples,
809                f6DsId,
810                "http://fedora.info/definitions/1/0/access/objState",
811                dv.getDatastreamInfo().getState());
812        addStringLiteral(triples,
813                f6DsId,
814                "http://www.loc.gov/premis/rdf/v1#formatDesignation",
815                dv.getFormatUri());
816
817        return triples;
818    }
819
820    private static void addStringLiteral(final Model m,
821                                         final String s,
822                                         final String p,
823                                         final String o) {
824        if (o != null) {
825            m.add(m.createResource(s), m.createProperty(p), o);
826        }
827    }
828
829    private static void addDateLiteral(final Model m,
830                                       final String s,
831                                       final String p,
832                                       final String date) {
833        if (date != null) {
834            m.addLiteral(m.createResource(s),
835                         m.createProperty(p),
836                         m.createTypedLiteral(date, XSDDatatype.XSDdateTime));
837        }
838    }
839
840    private static void addLongLiteral(final Model m,
841                                       final String s,
842                                       final String p,
843                                       final long number) {
844        if (number != -1) {
845            m.addLiteral(m.createResource(s),
846                    m.createProperty(p),
847                    m.createTypedLiteral(number, XSDDatatype.XSDlong));
848        }
849    }
850
851    /**
852     * @param mime any mimetype as String
853     * @return extension associated with arg mime, return includes '.' in extension (.txt).
854     *                  ..Empty String if unrecognized mime
855     */
856    private static String getExtension(final String mime) {
857        final MimeTypes allTypes = MimeTypes.getDefaultMimeTypes();
858        MimeType type;
859        try {
860            type = allTypes.forName(mime);
861        } catch (final MimeTypeException e) {
862            type = null;
863        }
864
865        if (type != null) {
866            return type.getExtension();
867        }
868
869        LOGGER.warn("No mimetype found for '{}'", mime);
870        return "";
871    }
872
873    private Model parseRdfXml(final DatastreamVersion datastreamVersion) {
874        final var model = ModelFactory.createDefaultModel();
875        try (final var is = datastreamVersion.getContent()) {
876            RDFDataMgr.read(model, is, Lang.RDFXML);
877            return model;
878        } catch (Exception e) {
879            throw new RuntimeException(String.format("Failed to parse RDF XML in %s/%s",
880                    datastreamVersion.getDatastreamInfo().getObjectInfo().getPid(),
881                    datastreamVersion.getDatastreamInfo().getDatastreamId()), e);
882        }
883    }
884
885    private Map<String, Model> splitRelsInt(final Model relsIntModel) {
886        final Map<String, Model> splitModels = new HashMap<>();
887        for (final var it = relsIntModel.listStatements(); it.hasNext();) {
888            final var statement = it.next();
889            final var id = statement.getSubject().getURI();
890            final var model = splitModels.computeIfAbsent(id, k -> ModelFactory.createDefaultModel());
891            model.add(statement);
892        }
893        return splitModels;
894    }
895
896    /**
897     * Creates a new session for the datastream when migrating as atomic resources, or returns the object session,
898     * when migrating as archival groups.
899     *
900     * @param id the datastream's id in fedora 6
901     * @param objectSession the datastream's object session
902     * @return either a new datastream session or the object session
903     */
904    private OcflObjectSession datastreamSession(final String id, final OcflObjectSession objectSession) {
905        if (resourceMigrationType == ResourceMigrationType.ARCHIVAL) {
906            return objectSession;
907        } else {
908            return newSession(id);
909        }
910    }
911
912    private OcflObjectSession newSession(final String id) {
913        return new OcflObjectSessionWrapper(sessionFactory.newSession(id));
914    }
915
916    /**
917     * Wrapper class for storing a RDF resource's "content" triples, RELS triples, and resource headers. The content
918     * triples are triples that were generated based on general Fedora metadata, and the RELS triples are extracted from
919     * one of the RELS-* files. They are maintained separately because it's possible for them to be updated
920     * independently and we need to be able to construct the correct set of triples when one changes.
921     */
922    private static class MetaHolder {
923        Model contentTriples;
924        Model relsTriples;
925        ResourceHeaders.Builder headers;
926
927        public static MetaHolder fromContent(final Model contentTriples, final ResourceHeaders.Builder headers) {
928            return new MetaHolder(contentTriples, null, headers);
929        }
930
931        private MetaHolder() {
932        }
933
934        private MetaHolder(final Model contentTriples,
935                           final Model relsTriples,
936                           final ResourceHeaders.Builder headers) {
937            this.contentTriples = contentTriples;
938            this.relsTriples = relsTriples;
939            this.headers = headers;
940        }
941
942        /**
943         * Constructs a complete set of triples at the current version of the resource and serializes them as n-triples.
944         *
945         * @return n-triples input stream
946         */
947        public InputStream constructTriples() {
948            final var output = new ByteArrayOutputStream();
949            final var triples = ModelFactory.createDefaultModel();
950
951            if (contentTriples != null) {
952                triples.add(contentTriples.listStatements());
953            }
954
955            if (relsTriples != null) {
956                triples.add(relsTriples.listStatements());
957            }
958
959            triples.write(output, Lang.NTRIPLES.getName());
960            return new ByteArrayInputStream(output.toByteArray());
961        }
962
963        public MetaHolder setHeaders(final ResourceHeaders.Builder headers) {
964            this.headers = headers;
965            return this;
966        }
967
968        public MetaHolder setContentTriples(final Model contentTriples) {
969            this.contentTriples = contentTriples;
970            return this;
971        }
972
973        public MetaHolder setRelsTriples(final Model relsTriples) {
974            this.relsTriples = relsTriples;
975            return this;
976        }
977    }
978
979    private static class BinaryMeta {
980        final String name;
981        final String mimeType;
982        final String label;
983
984        public BinaryMeta(final String name, final String mimeType, final String label) {
985            this.name = name;
986            this.mimeType = mimeType;
987            this.label = label;
988        }
989    }
990
991}