001/*
002 * The contents of this file are subject to the license and copyright
003 * detailed in the LICENSE and NOTICE files at the root of the source
004 * tree.
005 */
006package org.fcrepo.kernel.api.identifiers;
007
008import static org.fcrepo.kernel.api.FedoraTypes.FCR_ACL;
009import static org.fcrepo.kernel.api.FedoraTypes.FCR_METADATA;
010import static org.fcrepo.kernel.api.FedoraTypes.FCR_TOMBSTONE;
011import static org.fcrepo.kernel.api.FedoraTypes.FCR_VERSIONS;
012import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_ID_PREFIX;
013import static org.fcrepo.kernel.api.services.VersionService.MEMENTO_LABEL_FORMATTER;
014
015import java.time.Instant;
016import java.time.format.DateTimeParseException;
017import java.util.Arrays;
018import java.util.Objects;
019import java.util.Set;
020import java.util.regex.Pattern;
021import java.util.stream.Collectors;
022
023import org.fcrepo.kernel.api.exception.InvalidMementoPathException;
024import org.fcrepo.kernel.api.exception.InvalidResourceIdentifierException;
025
026import org.apache.commons.lang3.StringUtils;
027
028import com.fasterxml.jackson.annotation.JsonCreator;
029import com.fasterxml.jackson.annotation.JsonValue;
030import com.google.common.escape.Escaper;
031import com.google.common.net.PercentEscaper;
032
033/**
034 * Class to store contextual information about a Fedora ID.
035 *
036 * Differentiates between the original ID of the request and the actual resource we are operating on.
037 *
038 * Resource Id : the shortened ID of the base resource, mostly needed to access the correct persistence object.
039 * fullId : the full ID from the request, used in most cases.
040 *
041 * So a fullId of info:fedora/object1/another/fcr:versions/20000101121212 has an id of info:fedora/object1/another
042 *
043 * @author whikloj
044 * @since 6.0.0
045 */
046public class FedoraId {
047
048    /**
049     * These are strings that can cause problems with our storage layout
050     */
051    private static final Set<String> FORBIDDEN_ID_PART_STRINGS = Set.of(
052            "fcr-root",
053            ".fcrepo",
054            "fcr-container.nt"
055    );
056    private static final Set<String> FORBIDDEN_ID_PART_SUFFIXES = Set.of(
057            "~fcr-desc",
058            "~fcr-acl",
059            "~fcr-desc.nt",
060            "~fcr-acl.nt"
061    );
062
063    /**
064     * The Fedora ID with prefix and extensions. eg info:fedora/object1/another/fcr:versions/20000101121212
065     */
066    private final String fullId;
067
068    /**
069     * The Fedora ID with prefix but without extensions. eg info:fedora/object1/another
070     */
071    private final String baseId;
072
073    /**
074     * The Fedora ID without prefix but with extensions. eg /object1/another/fcr:versions/20000101121212
075     */
076    private final String fullPath;
077
078    /**
079     * The Fedora ID prefix and extensions URL encoded.
080     */
081    private final String encodedFullId;
082
083    /**
084     * The Full ID of the described resource. For binary -> binary, container -> container, binary description -> binary
085     */
086    private final String describedId;
087
088
089    private String hashUri;
090    private boolean isRepositoryRoot = false;
091    private boolean isNonRdfSourceDescription = false;
092    private boolean isAcl = false;
093    private boolean isMemento = false;
094    private boolean isTimemap = false;
095    private boolean isTombstone = false;
096    private Instant mementoDatetime;
097    private String mementoDatetimeStr;
098
099    private final static Set<Pattern> extensions = Set.of(FCR_TOMBSTONE, FCR_METADATA, FCR_ACL, FCR_VERSIONS)
100            .stream().map(Pattern::compile).collect(Collectors.toSet());
101
102    private final static Escaper fedoraIdEscaper = new PercentEscaper("-._~!$'()*,;&=@:+/?#", false);
103
104    /**
105     * Basic constructor.
106     * @param fullId The full identifier or null if root.
107     * @throws IllegalArgumentException If ID does not start with expected prefix.
108     */
109    private FedoraId(final String fullId) {
110        this.fullId = ensurePrefix(fullId).replaceAll("/+$", "");
111        // Carry the path of the request for any exceptions.
112        this.fullPath = this.fullId.substring(FEDORA_ID_PREFIX.length());
113        checkForInvalidPath();
114        this.baseId = processIdentifier();
115        enforceStorageLayoutNamingConstraints();
116        this.encodedFullId = fedoraIdEscaper.escape(this.fullId);
117        this.describedId = this.fullId.replace("/" + FCR_METADATA, "");
118    }
119
120    /**
121     * Static create method
122     * @param additions One or more strings to build an ID.
123     * @return The FedoraId.
124     */
125    @JsonCreator
126    public static FedoraId create(final String... additions) {
127        return new FedoraId(idBuilder(additions));
128    }
129
130    /**
131     * Get a FedoraId for repository root.
132     * @return The FedoraId for repository root.
133     */
134    public static FedoraId getRepositoryRootId() {
135        return new FedoraId(null);
136    }
137
138    /**
139     * Is the identifier for the repository root.
140     * @return true of id is equal to info:fedora/
141     */
142    public boolean isRepositoryRoot() {
143        return isRepositoryRoot;
144    }
145
146    /**
147     * Is the identifier for a Memento?
148     * @return true if the id is for the fcr:versions endpoint and has a memento datetime string after it.
149     */
150    public boolean isMemento() {
151        return isMemento;
152    }
153
154    /**
155     * Is the identifier for an ACL?
156     * @return true if the id is for the fcr:acl endpoint.
157     */
158    public boolean isAcl() {
159        return isAcl;
160    }
161
162    /**
163     * Is the identifier for a timemap?
164     * @return true if id for the fcr:versions endpoint and NOT a memento.
165     */
166    public boolean isTimemap() {
167        return isTimemap;
168    }
169
170    /**
171     * Is the identifier for a nonRdfSourceDescription?
172     * @return true if id for the fcr:metadata endpoint
173     */
174    public boolean isDescription() {
175        return isNonRdfSourceDescription;
176    }
177
178    /**
179     * Is the identifier for a tombstone
180     * @return true if id for the fcr:tombstone endpoint
181     */
182    public boolean isTombstone() {
183        return isTombstone;
184    }
185
186    /**
187     * Is the identifier for a hash uri?
188     * @return true if full id referenced a hash uri.
189     */
190    public boolean isHashUri() {
191        return hashUri != null;
192    }
193
194    /**
195     * Get the hash uri.
196     * @return the hash uri from the id or null if none.
197     */
198    public String getHashUri() {
199        return hashUri;
200    }
201
202    /**
203     * Returns the ID string for the physical resource the Fedora ID describes. In most cases, this ID is the same as
204     * the full resource ID. However, if the resource is a memento, timemap, or tombstone, then the ID returned here
205     * will be for the resource that contains it. Here are some examples:
206     *
207     * <ul>
208     *     <li>"info:fedora/object1/another/fcr:versions/20000101121212" =&gt; "info:fedora/object1/another"</li>
209     *     <li>"info:fedora/object1/another/fcr:metadata" =&gt; "info:fedora/object1/another/fcr:metadata"</li>
210     *     <li>"info:fedora/object1/another" =&gt; "info:fedora/object1/another"</li>
211     * </ul>
212     *
213     * @return the ID of the associated physical resource
214     */
215    public String getResourceId() {
216        if (isNonRdfSourceDescription) {
217            return baseId + "/" + FCR_METADATA;
218        } else if (isAcl) {
219            return baseId + "/" + FCR_ACL;
220        }
221        return baseId;
222    }
223
224    /**
225     * Behaves the same as {@link #getResourceId()} except it returns a FedoraId rather than a String.
226     *
227     * @return the ID of the associated physical resource
228     */
229    public FedoraId asResourceId() {
230        return FedoraId.create(getResourceId());
231    }
232
233    /**
234     * Returns the ID string for the base ID the Fedora ID describes. This value is the equivalent of the full ID
235     * with all extensions removed.
236     *
237     * <ul>
238     *     <li>"info:fedora/object1/another/fcr:versions/20000101121212" =&gt; "info:fedora/object1/another"</li>
239     *     <li>"info:fedora/object1/another/fcr:metadata" =&gt; "info:fedora/object1/another"</li>
240     *     <li>"info:fedora/object1/another" =&gt; "info:fedora/object1/another"</li>
241     * </ul>
242     *
243     * @return the ID of the associated base resource
244     */
245    public String getBaseId() {
246        return baseId;
247    }
248
249    /**
250     * Behaves the same as {@link #getBaseId()} except it returns a FedoraId rather than a String.
251     *
252     * @return the ID of the associated base resource
253     */
254    public FedoraId asBaseId() {
255        return FedoraId.create(getBaseId());
256    }
257
258    /**
259     * Return the original full ID.
260     * @return the id.
261     */
262    public String getFullId() {
263        return fullId;
264    }
265
266    /**
267     * Return the original full ID without the info:fedora prefix.
268     * @return the full id path part
269     */
270    public String getFullIdPath() {
271        return fullPath;
272    }
273
274    /**
275     * @return The encoded full ID.
276     */
277    public String getEncodedFullId() {
278        return encodedFullId;
279    }
280
281    /**
282     * Return the Memento datetime as Instant.
283     * @return The datetime or null if not a memento.
284     */
285    public Instant getMementoInstant() {
286        return mementoDatetime;
287    }
288
289    /**
290     * Return the Memento datetime string.
291     * @return The yyyymmddhhiiss memento datetime or null if not a Memento.
292     */
293    public String getMementoString() {
294        return mementoDatetimeStr;
295    }
296
297    /**
298     * Creates a new Fedora ID by joining the base ID of this Fedora ID with the specified string part. Any extensions
299     * that this Fedora ID contains are discarded. For example:
300     * <p>
301     * Resolving "child" against "info:fedora/object1/another/fcr:versions/20000101121212" yields
302     * "info:fedora/object1/another/child".
303     *
304     * @param child the part to join
305     * @return new Fedora ID in the form baseId/child
306     */
307    public FedoraId resolve(final String child) {
308        if (StringUtils.isBlank(child)) {
309            throw new IllegalArgumentException("Child cannot be blank");
310        }
311        return FedoraId.create(baseId, child);
312    }
313
314    /**
315     * Creates a new Fedora ID based on this ID that points to an ACL resource. The base ID, full ID without extensions,
316     * is always used to construct an ACL ID. If this ID is already an ACL, then it returns itself.
317     *
318     * @return ACL resource ID
319     */
320    public FedoraId asAcl() {
321        if (isAcl()) {
322            return this;
323        }
324
325        return FedoraId.create(getBaseId(), FCR_ACL);
326    }
327
328    /**
329     * Creates a new Fedora ID based on this ID that points to a binary description resource. There is no guarantee that
330     * the binary description resource exists. If this ID is already a description, then it returns itself. Otherwise,
331     * it uses the base ID, without extensions, to construct the new ID. If this Fedora ID is a timemap or memento or
332     * a hash uri, then these extensions are applied to new description ID as well.
333     *
334     * @return description resource ID
335     */
336    public FedoraId asDescription() {
337        if (isDescription()) {
338            return this;
339        }
340
341        if (isTimemap()) {
342            return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS);
343        }
344
345        if (isMemento()) {
346            return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS, appendHashIfPresent(getMementoString()));
347        }
348
349        return FedoraId.create(getBaseId(), appendHashIfPresent(FCR_METADATA));
350    }
351
352    /**
353     * Returns the FullId of the described resource.
354     *
355     * @return The ID.
356     */
357    public String getFullDescribedId() {
358        return describedId;
359    }
360
361    /**
362     * Creates a new Fedora ID based on this ID that points to a tombstone resource. If this ID is already a tombstone,
363     * then it returns itself. Otherwise, it uses the base ID, without extensions, to construct the new ID.
364     *
365     * @return tombstone resource ID
366     */
367    public FedoraId asTombstone() {
368        if (isTombstone()) {
369            return this;
370        }
371
372        return FedoraId.create(getBaseId(), FCR_TOMBSTONE);
373    }
374
375    /**
376     * Creates a new Fedora ID based on this ID that points to a timemap resource. If this ID is already a timemap,
377     * then it returns itself. Otherwise, it uses the base ID, without extensions, to construct the new ID. Unless
378     * this ID is a binary description, in which case the new ID is constructed using the full ID.
379     *
380     * @return timemap resource ID
381     */
382    public FedoraId asTimemap() {
383        if (isTimemap()) {
384            return this;
385        }
386
387        if (isDescription()) {
388            return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS);
389        }
390
391        return FedoraId.create(getBaseId(), FCR_VERSIONS);
392    }
393
394    /**
395     * Creates a new Fedora ID based on this ID that points to a memento resource. If this ID is already a memento,
396     * then it returns itself. If this ID is an ACL, tombstone, or timemap, then the new ID is constructed using this
397     * ID's base ID. Otherwise, the full ID is used.
398     *
399     * @param mementoInstant memento representation
400     * @return memento resource ID
401     */
402    public FedoraId asMemento(final Instant mementoInstant) {
403        return asMemento(MEMENTO_LABEL_FORMATTER.format(mementoInstant));
404    }
405
406    /**
407     * Creates a new Fedora ID based on this ID that points to a memento resource. If this ID is already a memento,
408     * then it returns itself. If this ID is an ACL, tombstone, or timemap, then the new ID is constructed using this
409     * ID's base ID. If this ID is a description, then the new ID is appended to the description ID.
410     *
411     * @param mementoString string memento representation
412     * @return memento resource ID
413     */
414    public FedoraId asMemento(final String mementoString) {
415        if (isMemento()) {
416            return this;
417        }
418
419        if (isDescription()) {
420            return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS, appendHashIfPresent(mementoString));
421        }
422
423        if (isAcl() || isTombstone() || isTimemap()) {
424            return FedoraId.create(getBaseId(), FCR_VERSIONS, mementoString);
425        }
426
427        return FedoraId.create(getBaseId(), FCR_VERSIONS, appendHashIfPresent(mementoString));
428    }
429
430    @Override
431    public boolean equals(final Object obj) {
432        if (obj == this) {
433            return true;
434        }
435
436        if (!(obj instanceof FedoraId)) {
437            return false;
438        }
439
440        final var testObj = (FedoraId) obj;
441        return Objects.equals(testObj.getFullId(), this.getFullId());
442    }
443
444    @Override
445    public int hashCode() {
446        return getFullId().hashCode();
447    }
448
449    @JsonValue
450    @Override
451    public String toString() {
452        return getFullId();
453    }
454
455    /**
456     * Concatenates all the parts with slashes
457     * @param parts array of strings
458     * @return the concatenated string.
459     */
460    private static String idBuilder(final String... parts) {
461        if (parts != null && parts.length > 0) {
462            return Arrays.stream(parts).filter(Objects::nonNull)
463                    .map(s -> s.startsWith("/") ? s.substring(1) : s)
464                    .map(s -> s.endsWith("/") ? s.substring(0, s.length() - 1 ) : s)
465                    .collect(Collectors.joining("/"));
466        }
467        return "";
468    }
469
470    /**
471     * Ensure the ID has the info:fedora/ prefix.
472     * @param id the identifier, if null assume repository root (info:fedora/)
473     * @return the identifier with the info:fedora/ prefix.
474     */
475    private static String ensurePrefix(final String id) {
476        if (id == null) {
477            return FEDORA_ID_PREFIX;
478        }
479        return id.startsWith(FEDORA_ID_PREFIX) ? id : FEDORA_ID_PREFIX + "/" + id;
480    }
481
482    /**
483     * Process the original ID into its parts without using a regular expression.
484     */
485    private String processIdentifier() {
486        // Regex pattern which decomposes a http resource uri into components
487        // The first group determines if it is an fcr:metadata non-rdf source.
488        // The second group determines if the path is for a memento or timemap.
489        // The third group allows for a memento identifier.
490        // The fourth group for allows ACL.
491        // The fifth group allows for any hashed suffixes.
492        // ".*?(/" + FCR_METADATA + ")?(/" + FCR_VERSIONS + "(/\\d{14})?)?(/" + FCR_ACL + ")?(\\#\\S+)?$");
493        if (this.fullId.contains("//")) {
494            throw new InvalidResourceIdentifierException(String.format("Path contains empty element! %s", fullPath));
495        }
496        String processID = this.fullId;
497        if (processID.equals(FEDORA_ID_PREFIX)) {
498            this.isRepositoryRoot = true;
499            return this.fullId;
500        }
501        if (processID.contains("#")) {
502            final String[] hashSplits = StringUtils.splitPreserveAllTokens(processID, "#");
503            if (hashSplits.length > 2) {
504                throw new InvalidResourceIdentifierException(String.format(
505                        "Path <%s> is invalid. It may not contain more than one #",
506                        fullPath));
507            }
508            this.hashUri = hashSplits[1];
509            processID = hashSplits[0];
510        }
511        if (processID.contains(FCR_TOMBSTONE)) {
512            processID = removePart(processID, FCR_TOMBSTONE);
513            this.isTombstone = true;
514        }
515        if (processID.contains(FCR_ACL)) {
516            processID = removePart(processID, FCR_ACL);
517            this.isAcl = true;
518        }
519        if (processID.contains(FCR_VERSIONS)) {
520            final String[] versionSplits = split(processID, FCR_VERSIONS);
521            if (versionSplits.length > 2) {
522                throw new InvalidResourceIdentifierException(String.format(
523                        "Path <%s> is invalid. May not contain multiple %s parts.",
524                        fullPath, FCR_VERSIONS));
525            } else if (versionSplits.length == 2 && versionSplits[1].isEmpty()) {
526                this.isTimemap = true;
527            } else {
528                final String afterVersion = versionSplits[1];
529                if (afterVersion.matches("/\\d{14}")) {
530                    this.isMemento = true;
531                    this.mementoDatetimeStr = afterVersion.substring(1);
532                    try {
533                        this.mementoDatetime = Instant.from(MEMENTO_LABEL_FORMATTER.parse(this.mementoDatetimeStr));
534                    } catch (final DateTimeParseException e) {
535                        throw new InvalidMementoPathException(String.format("Invalid request for memento at %s",
536                                fullPath));
537                    }
538                } else if (afterVersion.equals("/")) {
539                    // Possible trailing slash?
540                    this.isTimemap = true;
541                } else {
542                    throw new InvalidMementoPathException(String.format("Invalid request for memento at %s", fullPath));
543                }
544            }
545            processID = versionSplits[0];
546        }
547        if (processID.contains(FCR_METADATA)) {
548            processID = removePart(processID, FCR_METADATA);
549            this.isNonRdfSourceDescription = true;
550        }
551        if (processID.endsWith("/")) {
552            processID = processID.replaceAll("/+$", "");
553        }
554
555        return processID;
556    }
557
558    private String removePart(final String original, final String part) {
559        final String[] split = split(original, part);
560        if (split.length > 2 || (split.length == 2 && !split[1].isEmpty())) {
561            throw new InvalidResourceIdentifierException("Path is invalid:" + fullPath);
562        }
563        return split[0];
564    }
565
566    private String[] split(final String original, final String part) {
567        return StringUtils.splitByWholeSeparatorPreserveAllTokens(original, "/" + part);
568    }
569
570    /**
571     * Check for obvious path errors.
572     */
573    private void checkForInvalidPath() {
574        // Check for combinations of endpoints not allowed.
575        if (
576            // ID contains fcr:acl or fcr:tombstone AND fcr:metadata or fcr:versions
577            ((this.fullId.contains(FCR_ACL) || this.fullId.contains(FCR_TOMBSTONE)) &&
578                (this.fullId.contains(FCR_METADATA) || this.fullId.contains(FCR_VERSIONS))) ||
579            // or ID contains fcr:acl AND fcr:tombstone
580            (this.fullId.contains(FCR_TOMBSTONE) && this.fullId.contains(FCR_ACL))
581        ) {
582            throw new InvalidResourceIdentifierException(String.format("Path is invalid: %s", fullPath));
583        }
584        // Ensure we don't have 2 of any of the extensions, ie. info:fedora/object/fcr:acl/fcr:acl, etc.
585        for (final Pattern extension : extensions) {
586            if (extension.matcher(this.fullId).results().count() > 1) {
587                throw new InvalidResourceIdentifierException(String.format("Path is invalid: %s", fullPath));
588            }
589        }
590    }
591
592    /**
593     * Ensures that the Fedora ID does not violate any naming restrictions that are in place prevent collisions on disk.
594     * These restrictions are based on the following naming conventions:
595     *      https://wiki.lyrasis.org/display/FF/Design+-+Fedora+OCFL+Object+Structure
596     *
597     * All ids should be validated on resource creation
598     */
599    private void enforceStorageLayoutNamingConstraints() {
600        final var finalPart = StringUtils.substringAfterLast(baseId, "/");
601
602        if (FORBIDDEN_ID_PART_STRINGS.contains(finalPart)) {
603            throw new InvalidResourceIdentifierException(
604                    String.format("Invalid resource ID. IDs may not contain the string '%s'.", finalPart));
605        }
606
607        FORBIDDEN_ID_PART_SUFFIXES.forEach(suffix -> {
608            if (finalPart.endsWith(suffix) && !finalPart.equals(suffix)) {
609                throw new InvalidResourceIdentifierException(
610                        String.format("Invalid resource ID. IDs may not end with '%s'.", suffix));
611            }
612        });
613    }
614
615    private String appendHashIfPresent(final String original) {
616        if (isHashUri()) {
617            return original + "#" + getHashUri();
618        }
619        return original;
620    }
621
622}