001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.camel.reindexing;
019
020import static java.net.InetAddress.getLocalHost;
021import static org.apache.camel.Exchange.CONTENT_TYPE;
022import static org.apache.camel.Exchange.HTTP_METHOD;
023import static org.apache.camel.Exchange.HTTP_RESPONSE_CODE;
024import static org.apache.camel.LoggingLevel.INFO;
025import static org.fcrepo.camel.reindexing.ReindexingHeaders.REINDEXING_RECIPIENTS;
026import static org.fcrepo.camel.reindexing.ReindexingHeaders.REINDEXING_HOST;
027import static org.fcrepo.camel.reindexing.ReindexingHeaders.REINDEXING_PORT;
028import static org.fcrepo.camel.reindexing.ReindexingHeaders.REINDEXING_PREFIX;
029import static org.fcrepo.camel.FcrepoHeaders.FCREPO_BASE_URL;
030import static org.fcrepo.camel.FcrepoHeaders.FCREPO_URI;
031import static org.fcrepo.client.HttpMethods.GET;
032import static org.slf4j.LoggerFactory.getLogger;
033
034import org.apache.camel.PropertyInject;
035import org.apache.camel.builder.RouteBuilder;
036import org.slf4j.Logger;
037
038/**
039 * A content router for handling JMS events.
040 *
041 * @author Aaron Coburn
042 */
043public class ReindexingRouter extends RouteBuilder {
044
045    private static final Logger LOGGER = getLogger(ReindexingRouter.class);
046    private static final int BAD_REQUEST = 400;
047    private static final String LDP_CONTAINS = "<http://www.w3.org/ns/ldp#contains>";
048
049    @PropertyInject(value = "rest.port", defaultValue = "9080")
050    private String port;
051
052    @PropertyInject(value = "rest.host", defaultValue = "localhost")
053    private String host;
054
055    /**
056     * Configure the message route workflow.
057     */
058    public void configure() throws Exception {
059
060        final String hostname = host.startsWith("http") ? host : "http://" + host;
061
062        /**
063         * A generic error handler (specific to this RouteBuilder)
064         */
065        onException(Exception.class)
066            .maximumRedeliveries("{{error.maxRedeliveries}}")
067            .log("Index Routing Error: ${routeId}");
068
069        /**
070         * Expose a RESTful endpoint for re-indexing
071         */
072        from("jetty:" + hostname + ":" + port + "{{rest.prefix}}?matchOnUriPrefix=true&httpMethodRestrict=GET,POST")
073            .routeId("FcrepoReindexingRest")
074            .routeDescription("Expose the reindexing endpoint over HTTP")
075            .setHeader(FCREPO_URI).simple("{{fcrepo.baseUrl}}${headers.CamelHttpPath}")
076            .choice()
077                .when(header(HTTP_METHOD).isEqualTo("GET")).to("direct:usage")
078                .otherwise().to("direct:reindex");
079
080        from("direct:usage").routeId("FcrepoReindexingUsage")
081            .setHeader(REINDEXING_PREFIX).simple("{{rest.prefix}}")
082            .setHeader(REINDEXING_PORT).simple(port)
083            .setHeader(FCREPO_BASE_URL).simple("{{fcrepo.baseUrl}}")
084            .process(exchange -> {
085                exchange.getIn().setHeader(REINDEXING_HOST, getLocalHost().getHostName());
086            })
087            .to("mustache:org/fcrepo/camel/reindexing/usage.mustache");
088
089        /**
090         * A Re-indexing endpoint, setting where in the fcrepo hierarchy
091         * a re-indexing operation should begin.
092         */
093        from("direct:reindex").routeId("FcrepoReindexingReindex")
094            .process(new RestProcessor())
095            .removeHeaders("CamelHttp*")
096            .removeHeader("JMSCorrelationID")
097            .setBody(constant(null))
098            .choice()
099                .when(header(HTTP_RESPONSE_CODE).isGreaterThanOrEqualTo(BAD_REQUEST))
100                    .endChoice()
101                .when(header(REINDEXING_RECIPIENTS).isEqualTo(""))
102                    .transform().simple("No endpoints configured for indexing")
103                    .endChoice()
104                .otherwise()
105                    .log(INFO, LOGGER, "Initial indexing path: ${headers[CamelFcrepoUri]}")
106                    .inOnly("{{reindexing.stream}}?disableTimeToLive=true")
107                    .setHeader(CONTENT_TYPE).constant("text/plain")
108                    .transform().simple("Indexing started at ${headers[CamelFcrepoUri]}");
109
110        /**
111         *  A route that traverses through a fedora heirarchy
112         *  indexing nodes, as appropriate.
113         */
114        from("{{reindexing.stream}}?asyncConsumer=true").routeId("FcrepoReindexingTraverse")
115            .inOnly("direct:recipients")
116            .removeHeaders("CamelHttp*")
117            .setHeader(HTTP_METHOD).constant(GET)
118            .to("fcrepo:{{fcrepo.baseUrl}}?preferInclude=PreferContainment" +
119                    "&preferOmit=ServerManaged&accept=application/n-triples")
120            // split the n-triples stream on line breaks so that each triple is split into a separate message
121            .split(body().tokenize("\\n")).streaming()
122                .removeHeader(FCREPO_URI)
123                .removeHeader("JMSCorrelationID")
124                .process(exchange -> {
125                    // This is a simple n-triples parser, spliting nodes on whitespace according to
126                    // https://www.w3.org/TR/n-triples/#n-triples-grammar
127                    // If the body is not null and the predicate is ldp:contains and the object is a URI,
128                    // then set the CamelFcrepoUri header (if that header is not set, the processing stops
129                    // at the filter() line below.
130                    final String body = exchange.getIn().getBody(String.class);
131                    if (body != null) {
132                        final String parts[] = body.split("\\s+");
133                        if (parts.length > 2 && parts[1].equals(LDP_CONTAINS) && parts[2].startsWith("<")) {
134                            exchange.getIn().setHeader(FCREPO_URI, parts[2].substring(1, parts[2].length() - 1));
135                        }
136                        exchange.getIn().setBody(null);
137                    }
138                })
139                .filter(header(FCREPO_URI).isNotNull())
140                    .inOnly("{{reindexing.stream}}?disableTimeToLive=true");
141
142        /**
143         *  Send the message to all of the pre-determined endpoints
144         */
145        from("direct:recipients").routeId("FcrepoReindexingRecipients")
146            .recipientList(header(REINDEXING_RECIPIENTS))
147            .ignoreInvalidEndpoints();
148    }
149}