001/*
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.migration.urlmappers;
017
018import org.fcrepo.migration.ExternalContentURLMapper;
019import org.fcrepo.migration.MigrationIDMapper;
020
021import java.util.ArrayList;
022import java.util.List;
023import java.util.regex.Matcher;
024import java.util.regex.Pattern;
025
026/**
027 * An ExternalContentURLMapper implementation that updates redirects that point to the
028 * fedora repository in which they originated to the destination of that pointed-to resource
029 * in the fedora 4 repository to which the content is being migrated.
030 *
031 * For example, if "http://localhost:8080/fedora/objects/object:1/datastreams/POLICY" was a
032 * redirect datastream in fedora 3 that redirected to
033 * "http://localhost:8080/fedora/objects/policy:1/datastreams/XACML/content", this class would
034 * supply the URL for the content of the migrated XACML datastream on the migrated policy:1
035 * object.
036 *
037 * @author Mike Durbin
038 */
039public class SelfReferencingURLMapper implements ExternalContentURLMapper {
040
041    private static final String OLD_DS_CONTENT_URL_PATTERN = "http://{local-fedora-server}/fedora/get/([^/]+)/(.+)";
042    private static final String NEW_DS_CONTENT_URL_PATTERN
043            = "http://{local-fedora-server}/fedora/objects/([^/]+)/datastreams/([^/]+)/content";
044
045    private List<Pattern> contentPatterns;
046
047    /**
048     * A pattern that is compared after the content patterns, and if it matches,
049     * an exception is thrown.  This is implemented to allow an error to be thrown
050     * if any unmatched URLs that reference the fedora 3 repository are found; a
051     * case that generally indicates a configuration error in the migration scenario.
052     */
053    private Pattern invalidPattern;
054
055    private MigrationIDMapper idMapper;
056
057    /**
058     * Basic constructor.
059     * @param localFedoraServer the domain and port for the server that hosted the fedora objects in the format
060     *                          "localhost:8080".
061     * @param idMapper the MigrationIDMapper used for the current migration scenario
062     */
063    public SelfReferencingURLMapper(final String localFedoraServer, final MigrationIDMapper idMapper) {
064        this.contentPatterns = new ArrayList<>();
065        this.contentPatterns.add(parsePattern(OLD_DS_CONTENT_URL_PATTERN, localFedoraServer));
066        this.contentPatterns.add(parsePattern(NEW_DS_CONTENT_URL_PATTERN, localFedoraServer));
067        this.idMapper = idMapper;
068
069        this.invalidPattern = parsePattern("http://{local-fedora-server}/fedora/.*", localFedoraServer);
070    }
071
072    private Pattern parsePattern(final String pattern, final String localFedoraServer) {
073        return Pattern.compile(pattern.replace("{local-fedora-server}", localFedoraServer));
074    }
075
076    @Override
077    public String mapURL(final String url) {
078        for (Pattern p : contentPatterns) {
079            final Matcher m = p.matcher(url);
080            if (m.matches()) {
081                return idMapper.getBaseURL() + idMapper.mapDatastreamPath(m.group(1), m.group(2));
082            }
083        }
084        if (invalidPattern.matcher(url).matches()) {
085            throw new IllegalArgumentException("Unhandled internal external fedora 3 URL. (" + url + ")");
086        }
087        return url;
088    }
089}