001/* 002 * The contents of this file are subject to the license and copyright 003 * detailed in the LICENSE and NOTICE files at the root of the source 004 * tree. 005 */ 006package org.fcrepo.kernel.api.identifiers; 007 008import static org.fcrepo.kernel.api.FedoraTypes.FCR_ACL; 009import static org.fcrepo.kernel.api.FedoraTypes.FCR_METADATA; 010import static org.fcrepo.kernel.api.FedoraTypes.FCR_TOMBSTONE; 011import static org.fcrepo.kernel.api.FedoraTypes.FCR_VERSIONS; 012import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_ID_PREFIX; 013import static org.fcrepo.kernel.api.services.VersionService.MEMENTO_LABEL_FORMATTER; 014 015import java.time.Instant; 016import java.time.format.DateTimeParseException; 017import java.util.Arrays; 018import java.util.Objects; 019import java.util.Set; 020import java.util.regex.Pattern; 021import java.util.stream.Collectors; 022 023import org.fcrepo.kernel.api.exception.InvalidMementoPathException; 024import org.fcrepo.kernel.api.exception.InvalidResourceIdentifierException; 025 026import org.apache.commons.lang3.StringUtils; 027 028import com.fasterxml.jackson.annotation.JsonCreator; 029import com.fasterxml.jackson.annotation.JsonValue; 030import com.google.common.escape.Escaper; 031import com.google.common.net.PercentEscaper; 032 033/** 034 * Class to store contextual information about a Fedora ID. 035 * 036 * Differentiates between the original ID of the request and the actual resource we are operating on. 037 * 038 * Resource Id : the shortened ID of the base resource, mostly needed to access the correct persistence object. 039 * fullId : the full ID from the request, used in most cases. 040 * 041 * So a fullId of info:fedora/object1/another/fcr:versions/20000101121212 has an id of info:fedora/object1/another 042 * 043 * @author whikloj 044 * @since 6.0.0 045 */ 046public class FedoraId { 047 048 /** 049 * These are strings that can cause problems with our storage layout 050 */ 051 private static final Set<String> FORBIDDEN_ID_PART_STRINGS = Set.of( 052 "fcr-root", 053 ".fcrepo", 054 "fcr-container.nt" 055 ); 056 private static final Set<String> FORBIDDEN_ID_PART_SUFFIXES = Set.of( 057 "~fcr-desc", 058 "~fcr-acl", 059 "~fcr-desc.nt", 060 "~fcr-acl.nt" 061 ); 062 063 /** 064 * The Fedora ID with prefix and extensions. eg info:fedora/object1/another/fcr:versions/20000101121212 065 */ 066 private final String fullId; 067 068 /** 069 * The Fedora ID with prefix but without extensions. eg info:fedora/object1/another 070 */ 071 private final String baseId; 072 073 /** 074 * The Fedora ID without prefix but with extensions. eg /object1/another/fcr:versions/20000101121212 075 */ 076 private final String fullPath; 077 078 /** 079 * The Fedora ID prefix and extensions URL encoded. 080 */ 081 private final String encodedFullId; 082 083 /** 084 * The Full ID of the described resource. For binary -> binary, container -> container, binary description -> binary 085 */ 086 private final String describedId; 087 088 089 private String hashUri; 090 private boolean isRepositoryRoot = false; 091 private boolean isNonRdfSourceDescription = false; 092 private boolean isAcl = false; 093 private boolean isMemento = false; 094 private boolean isTimemap = false; 095 private boolean isTombstone = false; 096 private Instant mementoDatetime; 097 private String mementoDatetimeStr; 098 099 private final static Set<Pattern> extensions = Set.of(FCR_TOMBSTONE, FCR_METADATA, FCR_ACL, FCR_VERSIONS) 100 .stream().map(Pattern::compile).collect(Collectors.toSet()); 101 102 private final static Escaper fedoraIdEscaper = new PercentEscaper("-._~!$'()*,;&=@:+/?#", false); 103 104 /** 105 * Basic constructor. 106 * @param fullId The full identifier or null if root. 107 * @throws IllegalArgumentException If ID does not start with expected prefix. 108 */ 109 private FedoraId(final String fullId) { 110 this.fullId = ensurePrefix(fullId).replaceAll("/+$", ""); 111 // Carry the path of the request for any exceptions. 112 this.fullPath = this.fullId.substring(FEDORA_ID_PREFIX.length()); 113 checkForInvalidPath(); 114 this.baseId = processIdentifier(); 115 enforceStorageLayoutNamingConstraints(); 116 this.encodedFullId = fedoraIdEscaper.escape(this.fullId); 117 this.describedId = this.fullId.replace("/" + FCR_METADATA, ""); 118 } 119 120 /** 121 * Static create method 122 * @param additions One or more strings to build an ID. 123 * @return The FedoraId. 124 */ 125 @JsonCreator 126 public static FedoraId create(final String... additions) { 127 return new FedoraId(idBuilder(additions)); 128 } 129 130 /** 131 * Get a FedoraId for repository root. 132 * @return The FedoraId for repository root. 133 */ 134 public static FedoraId getRepositoryRootId() { 135 return new FedoraId(null); 136 } 137 138 /** 139 * Is the identifier for the repository root. 140 * @return true of id is equal to info:fedora/ 141 */ 142 public boolean isRepositoryRoot() { 143 return isRepositoryRoot; 144 } 145 146 /** 147 * Is the identifier for a Memento? 148 * @return true if the id is for the fcr:versions endpoint and has a memento datetime string after it. 149 */ 150 public boolean isMemento() { 151 return isMemento; 152 } 153 154 /** 155 * Is the identifier for an ACL? 156 * @return true if the id is for the fcr:acl endpoint. 157 */ 158 public boolean isAcl() { 159 return isAcl; 160 } 161 162 /** 163 * Is the identifier for a timemap? 164 * @return true if id for the fcr:versions endpoint and NOT a memento. 165 */ 166 public boolean isTimemap() { 167 return isTimemap; 168 } 169 170 /** 171 * Is the identifier for a nonRdfSourceDescription? 172 * @return true if id for the fcr:metadata endpoint 173 */ 174 public boolean isDescription() { 175 return isNonRdfSourceDescription; 176 } 177 178 /** 179 * Is the identifier for a tombstone 180 * @return true if id for the fcr:tombstone endpoint 181 */ 182 public boolean isTombstone() { 183 return isTombstone; 184 } 185 186 /** 187 * Is the identifier for a hash uri? 188 * @return true if full id referenced a hash uri. 189 */ 190 public boolean isHashUri() { 191 return hashUri != null; 192 } 193 194 /** 195 * Get the hash uri. 196 * @return the hash uri from the id or null if none. 197 */ 198 public String getHashUri() { 199 return hashUri; 200 } 201 202 /** 203 * Returns the ID string for the physical resource the Fedora ID describes. In most cases, this ID is the same as 204 * the full resource ID. However, if the resource is a memento, timemap, or tombstone, then the ID returned here 205 * will be for the resource that contains it. Here are some examples: 206 * 207 * <ul> 208 * <li>"info:fedora/object1/another/fcr:versions/20000101121212" => "info:fedora/object1/another"</li> 209 * <li>"info:fedora/object1/another/fcr:metadata" => "info:fedora/object1/another/fcr:metadata"</li> 210 * <li>"info:fedora/object1/another" => "info:fedora/object1/another"</li> 211 * </ul> 212 * 213 * @return the ID of the associated physical resource 214 */ 215 public String getResourceId() { 216 if (isNonRdfSourceDescription) { 217 return baseId + "/" + FCR_METADATA; 218 } else if (isAcl) { 219 return baseId + "/" + FCR_ACL; 220 } 221 return baseId; 222 } 223 224 /** 225 * Behaves the same as {@link #getResourceId()} except it returns a FedoraId rather than a String. 226 * 227 * @return the ID of the associated physical resource 228 */ 229 public FedoraId asResourceId() { 230 return FedoraId.create(getResourceId()); 231 } 232 233 /** 234 * Returns the ID string for the base ID the Fedora ID describes. This value is the equivalent of the full ID 235 * with all extensions removed. 236 * 237 * <ul> 238 * <li>"info:fedora/object1/another/fcr:versions/20000101121212" => "info:fedora/object1/another"</li> 239 * <li>"info:fedora/object1/another/fcr:metadata" => "info:fedora/object1/another"</li> 240 * <li>"info:fedora/object1/another" => "info:fedora/object1/another"</li> 241 * </ul> 242 * 243 * @return the ID of the associated base resource 244 */ 245 public String getBaseId() { 246 return baseId; 247 } 248 249 /** 250 * Behaves the same as {@link #getBaseId()} except it returns a FedoraId rather than a String. 251 * 252 * @return the ID of the associated base resource 253 */ 254 public FedoraId asBaseId() { 255 return FedoraId.create(getBaseId()); 256 } 257 258 /** 259 * Return the original full ID. 260 * @return the id. 261 */ 262 public String getFullId() { 263 return fullId; 264 } 265 266 /** 267 * Return the original full ID without the info:fedora prefix. 268 * @return the full id path part 269 */ 270 public String getFullIdPath() { 271 return fullPath; 272 } 273 274 /** 275 * @return The encoded full ID. 276 */ 277 public String getEncodedFullId() { 278 return encodedFullId; 279 } 280 281 /** 282 * Return the Memento datetime as Instant. 283 * @return The datetime or null if not a memento. 284 */ 285 public Instant getMementoInstant() { 286 return mementoDatetime; 287 } 288 289 /** 290 * Return the Memento datetime string. 291 * @return The yyyymmddhhiiss memento datetime or null if not a Memento. 292 */ 293 public String getMementoString() { 294 return mementoDatetimeStr; 295 } 296 297 /** 298 * Creates a new Fedora ID by joining the base ID of this Fedora ID with the specified string part. Any extensions 299 * that this Fedora ID contains are discarded. For example: 300 * <p> 301 * Resolving "child" against "info:fedora/object1/another/fcr:versions/20000101121212" yields 302 * "info:fedora/object1/another/child". 303 * 304 * @param child the part to join 305 * @return new Fedora ID in the form baseId/child 306 */ 307 public FedoraId resolve(final String child) { 308 if (StringUtils.isBlank(child)) { 309 throw new IllegalArgumentException("Child cannot be blank"); 310 } 311 return FedoraId.create(baseId, child); 312 } 313 314 /** 315 * Creates a new Fedora ID based on this ID that points to an ACL resource. The base ID, full ID without extensions, 316 * is always used to construct an ACL ID. If this ID is already an ACL, then it returns itself. 317 * 318 * @return ACL resource ID 319 */ 320 public FedoraId asAcl() { 321 if (isAcl()) { 322 return this; 323 } 324 325 return FedoraId.create(getBaseId(), FCR_ACL); 326 } 327 328 /** 329 * Creates a new Fedora ID based on this ID that points to a binary description resource. There is no guarantee that 330 * the binary description resource exists. If this ID is already a description, then it returns itself. Otherwise, 331 * it uses the base ID, without extensions, to construct the new ID. If this Fedora ID is a timemap or memento or 332 * a hash uri, then these extensions are applied to new description ID as well. 333 * 334 * @return description resource ID 335 */ 336 public FedoraId asDescription() { 337 if (isDescription()) { 338 return this; 339 } 340 341 if (isTimemap()) { 342 return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS); 343 } 344 345 if (isMemento()) { 346 return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS, appendHashIfPresent(getMementoString())); 347 } 348 349 return FedoraId.create(getBaseId(), appendHashIfPresent(FCR_METADATA)); 350 } 351 352 /** 353 * Returns the FullId of the described resource. 354 * 355 * @return The ID. 356 */ 357 public String getFullDescribedId() { 358 return describedId; 359 } 360 361 /** 362 * Creates a new Fedora ID based on this ID that points to a tombstone resource. If this ID is already a tombstone, 363 * then it returns itself. Otherwise, it uses the base ID, without extensions, to construct the new ID. 364 * 365 * @return tombstone resource ID 366 */ 367 public FedoraId asTombstone() { 368 if (isTombstone()) { 369 return this; 370 } 371 372 return FedoraId.create(getBaseId(), FCR_TOMBSTONE); 373 } 374 375 /** 376 * Creates a new Fedora ID based on this ID that points to a timemap resource. If this ID is already a timemap, 377 * then it returns itself. Otherwise, it uses the base ID, without extensions, to construct the new ID. Unless 378 * this ID is a binary description, in which case the new ID is constructed using the full ID. 379 * 380 * @return timemap resource ID 381 */ 382 public FedoraId asTimemap() { 383 if (isTimemap()) { 384 return this; 385 } 386 387 if (isDescription()) { 388 return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS); 389 } 390 391 return FedoraId.create(getBaseId(), FCR_VERSIONS); 392 } 393 394 /** 395 * Creates a new Fedora ID based on this ID that points to a memento resource. If this ID is already a memento, 396 * then it returns itself. If this ID is an ACL, tombstone, or timemap, then the new ID is constructed using this 397 * ID's base ID. Otherwise, the full ID is used. 398 * 399 * @param mementoInstant memento representation 400 * @return memento resource ID 401 */ 402 public FedoraId asMemento(final Instant mementoInstant) { 403 return asMemento(MEMENTO_LABEL_FORMATTER.format(mementoInstant)); 404 } 405 406 /** 407 * Creates a new Fedora ID based on this ID that points to a memento resource. If this ID is already a memento, 408 * then it returns itself. If this ID is an ACL, tombstone, or timemap, then the new ID is constructed using this 409 * ID's base ID. If this ID is a description, then the new ID is appended to the description ID. 410 * 411 * @param mementoString string memento representation 412 * @return memento resource ID 413 */ 414 public FedoraId asMemento(final String mementoString) { 415 if (isMemento()) { 416 return this; 417 } 418 419 if (isDescription()) { 420 return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS, appendHashIfPresent(mementoString)); 421 } 422 423 if (isAcl() || isTombstone() || isTimemap()) { 424 return FedoraId.create(getBaseId(), FCR_VERSIONS, mementoString); 425 } 426 427 return FedoraId.create(getBaseId(), FCR_VERSIONS, appendHashIfPresent(mementoString)); 428 } 429 430 @Override 431 public boolean equals(final Object obj) { 432 if (obj == this) { 433 return true; 434 } 435 436 if (!(obj instanceof FedoraId)) { 437 return false; 438 } 439 440 final var testObj = (FedoraId) obj; 441 return Objects.equals(testObj.getFullId(), this.getFullId()); 442 } 443 444 @Override 445 public int hashCode() { 446 return getFullId().hashCode(); 447 } 448 449 @JsonValue 450 @Override 451 public String toString() { 452 return getFullId(); 453 } 454 455 /** 456 * Concatenates all the parts with slashes 457 * @param parts array of strings 458 * @return the concatenated string. 459 */ 460 private static String idBuilder(final String... parts) { 461 if (parts != null && parts.length > 0) { 462 return Arrays.stream(parts).filter(Objects::nonNull) 463 .map(s -> s.startsWith("/") ? s.substring(1) : s) 464 .map(s -> s.endsWith("/") ? s.substring(0, s.length() - 1 ) : s) 465 .collect(Collectors.joining("/")); 466 } 467 return ""; 468 } 469 470 /** 471 * Ensure the ID has the info:fedora/ prefix. 472 * @param id the identifier, if null assume repository root (info:fedora/) 473 * @return the identifier with the info:fedora/ prefix. 474 */ 475 private static String ensurePrefix(final String id) { 476 if (id == null) { 477 return FEDORA_ID_PREFIX; 478 } 479 return id.startsWith(FEDORA_ID_PREFIX) ? id : FEDORA_ID_PREFIX + "/" + id; 480 } 481 482 /** 483 * Process the original ID into its parts without using a regular expression. 484 */ 485 private String processIdentifier() { 486 // Regex pattern which decomposes a http resource uri into components 487 // The first group determines if it is an fcr:metadata non-rdf source. 488 // The second group determines if the path is for a memento or timemap. 489 // The third group allows for a memento identifier. 490 // The fourth group for allows ACL. 491 // The fifth group allows for any hashed suffixes. 492 // ".*?(/" + FCR_METADATA + ")?(/" + FCR_VERSIONS + "(/\\d{14})?)?(/" + FCR_ACL + ")?(\\#\\S+)?$"); 493 if (this.fullId.contains("//")) { 494 throw new InvalidResourceIdentifierException(String.format("Path contains empty element! %s", fullPath)); 495 } 496 String processID = this.fullId; 497 if (processID.equals(FEDORA_ID_PREFIX)) { 498 this.isRepositoryRoot = true; 499 return this.fullId; 500 } 501 if (processID.contains("#")) { 502 final String[] hashSplits = StringUtils.splitPreserveAllTokens(processID, "#"); 503 if (hashSplits.length > 2) { 504 throw new InvalidResourceIdentifierException(String.format( 505 "Path <%s> is invalid. It may not contain more than one #", 506 fullPath)); 507 } 508 this.hashUri = hashSplits[1]; 509 processID = hashSplits[0]; 510 } 511 if (processID.contains(FCR_TOMBSTONE)) { 512 processID = removePart(processID, FCR_TOMBSTONE); 513 this.isTombstone = true; 514 } 515 if (processID.contains(FCR_ACL)) { 516 processID = removePart(processID, FCR_ACL); 517 this.isAcl = true; 518 } 519 if (processID.contains(FCR_VERSIONS)) { 520 final String[] versionSplits = split(processID, FCR_VERSIONS); 521 if (versionSplits.length > 2) { 522 throw new InvalidResourceIdentifierException(String.format( 523 "Path <%s> is invalid. May not contain multiple %s parts.", 524 fullPath, FCR_VERSIONS)); 525 } else if (versionSplits.length == 2 && versionSplits[1].isEmpty()) { 526 this.isTimemap = true; 527 } else { 528 final String afterVersion = versionSplits[1]; 529 if (afterVersion.matches("/\\d{14}")) { 530 this.isMemento = true; 531 this.mementoDatetimeStr = afterVersion.substring(1); 532 try { 533 this.mementoDatetime = Instant.from(MEMENTO_LABEL_FORMATTER.parse(this.mementoDatetimeStr)); 534 } catch (final DateTimeParseException e) { 535 throw new InvalidMementoPathException(String.format("Invalid request for memento at %s", 536 fullPath)); 537 } 538 } else if (afterVersion.equals("/")) { 539 // Possible trailing slash? 540 this.isTimemap = true; 541 } else { 542 throw new InvalidMementoPathException(String.format("Invalid request for memento at %s", fullPath)); 543 } 544 } 545 processID = versionSplits[0]; 546 } 547 if (processID.contains(FCR_METADATA)) { 548 processID = removePart(processID, FCR_METADATA); 549 this.isNonRdfSourceDescription = true; 550 } 551 if (processID.endsWith("/")) { 552 processID = processID.replaceAll("/+$", ""); 553 } 554 555 return processID; 556 } 557 558 private String removePart(final String original, final String part) { 559 final String[] split = split(original, part); 560 if (split.length > 2 || (split.length == 2 && !split[1].isEmpty())) { 561 throw new InvalidResourceIdentifierException("Path is invalid:" + fullPath); 562 } 563 return split[0]; 564 } 565 566 private String[] split(final String original, final String part) { 567 return StringUtils.splitByWholeSeparatorPreserveAllTokens(original, "/" + part); 568 } 569 570 /** 571 * Check for obvious path errors. 572 */ 573 private void checkForInvalidPath() { 574 // Check for combinations of endpoints not allowed. 575 if ( 576 // ID contains fcr:acl or fcr:tombstone AND fcr:metadata or fcr:versions 577 ((this.fullId.contains(FCR_ACL) || this.fullId.contains(FCR_TOMBSTONE)) && 578 (this.fullId.contains(FCR_METADATA) || this.fullId.contains(FCR_VERSIONS))) || 579 // or ID contains fcr:acl AND fcr:tombstone 580 (this.fullId.contains(FCR_TOMBSTONE) && this.fullId.contains(FCR_ACL)) 581 ) { 582 throw new InvalidResourceIdentifierException(String.format("Path is invalid: %s", fullPath)); 583 } 584 // Ensure we don't have 2 of any of the extensions, ie. info:fedora/object/fcr:acl/fcr:acl, etc. 585 for (final Pattern extension : extensions) { 586 if (extension.matcher(this.fullId).results().count() > 1) { 587 throw new InvalidResourceIdentifierException(String.format("Path is invalid: %s", fullPath)); 588 } 589 } 590 } 591 592 /** 593 * Ensures that the Fedora ID does not violate any naming restrictions that are in place prevent collisions on disk. 594 * These restrictions are based on the following naming conventions: 595 * https://wiki.lyrasis.org/display/FF/Design+-+Fedora+OCFL+Object+Structure 596 * 597 * All ids should be validated on resource creation 598 */ 599 private void enforceStorageLayoutNamingConstraints() { 600 final var finalPart = StringUtils.substringAfterLast(baseId, "/"); 601 602 if (FORBIDDEN_ID_PART_STRINGS.contains(finalPart)) { 603 throw new InvalidResourceIdentifierException( 604 String.format("Invalid resource ID. IDs may not contain the string '%s'.", finalPart)); 605 } 606 607 FORBIDDEN_ID_PART_SUFFIXES.forEach(suffix -> { 608 if (finalPart.endsWith(suffix) && !finalPart.equals(suffix)) { 609 throw new InvalidResourceIdentifierException( 610 String.format("Invalid resource ID. IDs may not end with '%s'.", suffix)); 611 } 612 }); 613 } 614 615 private String appendHashIfPresent(final String original) { 616 if (isHashUri()) { 617 return original + "#" + getHashUri(); 618 } 619 return original; 620 } 621 622}