001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.search.impl;
019
020import org.fcrepo.common.db.DbPlatform;
021import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
022import org.fcrepo.kernel.api.identifiers.FedoraId;
023import org.fcrepo.kernel.api.models.ResourceFactory;
024import org.fcrepo.kernel.api.models.ResourceHeaders;
025import org.fcrepo.search.api.Condition;
026import org.fcrepo.search.api.InvalidQueryException;
027import org.fcrepo.search.api.PaginationInfo;
028import org.fcrepo.search.api.SearchIndex;
029import org.fcrepo.search.api.SearchParameters;
030import org.fcrepo.search.api.SearchResult;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033import org.springframework.core.io.DefaultResourceLoader;
034import org.springframework.dao.DataAccessException;
035import org.springframework.jdbc.core.RowMapper;
036import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
037import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
038import org.springframework.jdbc.core.simple.SimpleJdbcInsert;
039import org.springframework.jdbc.datasource.init.DatabasePopulatorUtils;
040import org.springframework.jdbc.datasource.init.ResourceDatabasePopulator;
041import org.springframework.stereotype.Component;
042import org.springframework.transaction.annotation.Transactional;
043
044import javax.annotation.PostConstruct;
045import javax.inject.Inject;
046import javax.sql.DataSource;
047import java.net.URI;
048import java.sql.ResultSet;
049import java.sql.SQLException;
050import java.sql.Timestamp;
051import java.sql.Types;
052import java.time.Instant;
053import java.util.ArrayList;
054import java.util.HashMap;
055import java.util.HashSet;
056import java.util.List;
057import java.util.Map;
058import java.util.Set;
059import java.util.stream.Collectors;
060
061import static java.time.format.DateTimeFormatter.ISO_INSTANT;
062import static java.util.Collections.EMPTY_LIST;
063import static org.fcrepo.common.db.DbPlatform.H2;
064import static org.fcrepo.common.db.DbPlatform.MARIADB;
065import static org.fcrepo.common.db.DbPlatform.MYSQL;
066import static org.fcrepo.common.db.DbPlatform.POSTGRESQL;
067import static org.fcrepo.search.api.Condition.Field.CONTENT_SIZE;
068import static org.fcrepo.search.api.Condition.Field.FEDORA_ID;
069import static org.fcrepo.search.api.Condition.Field.MIME_TYPE;
070import static org.fcrepo.search.api.Condition.Field.RDF_TYPE;
071
072
073/**
074 * An implementation of the {@link SearchIndex}
075 *
076 * @author dbernstein
077 * @author whikloj
078 */
079@Component("searchIndexImpl")
080public class DbSearchIndexImpl implements SearchIndex {
081    public static final String SELECT_RDF_TYPE_ID = "select id, rdf_type_uri from search_rdf_type where rdf_type_uri " +
082            "in (:rdf_type_uri)";
083    private static final Logger LOGGER = LoggerFactory.getLogger(DbSearchIndexImpl.class);
084    private static final String SIMPLE_SEARCH_TABLE = "simple_search";
085    private static final String DELETE_FROM_INDEX_SQL = "DELETE FROM simple_search WHERE fedora_id = :fedora_id;";
086    private static final String UPDATE_INDEX_SQL =
087            "UPDATE simple_search SET modified = :modified, content_size = :content_size, mime_type =:mime_type " +
088                    "WHERE fedora_id = :fedora_id;";
089    private static final String SELECT_BY_FEDORA_ID =
090            "SELECT id FROM simple_search WHERE fedora_id = :fedora_id";
091    private static final String FEDORA_ID_PARAM = "fedora_id";
092    private static final String MODIFIED_PARAM = "modified";
093    private static final String CONTENT_SIZE_PARAM = "content_size";
094    private static final String MIME_TYPE_PARAM = "mime_type";
095    private static final String CREATED_PARAM = "created";
096    private static final String DELETE_RDF_TYPE_ASSOCIATIONS =
097            "DELETE FROM search_resource_rdf_type where resource_id = :resource_id";
098    private static final String RDF_TYPE_TABLE = ", (SELECT rrt.resource_id,  group_concat_function as rdf_type " +
099            "from search_resource_rdf_type rrt, " +
100            "search_rdf_type rt  WHERE rrt.rdf_type_id = rt.id group by rrt.resource_id) r, " +
101            "(SELECT rrt.resource_id from search_resource_rdf_type rrt, search_rdf_type rt " +
102            "WHERE rt.rdf_type_uri like :rdf_type_uri and rrt.rdf_type_id = rt.id group by rrt.resource_id) r_filter";
103    private static final String DEFAULT_DDL = "sql/default-search-index.sql";
104
105    private static final Map<DbPlatform, String> DDL_MAP = Map.of(
106            MYSQL, DEFAULT_DDL,
107            H2, DEFAULT_DDL,
108            POSTGRESQL, "sql/postgresql-search-index.sql",
109            MARIADB, DEFAULT_DDL
110    );
111    public static final String SEARCH_RESOURCE_RDF_TYPE_TABLE = "search_resource_rdf_type";
112    public static final String RESOURCE_ID_PARAM = "resource_id";
113    public static final String RDF_TYPE_ID_PARAM = "rdf_type_id";
114    public static final String RDF_TYPE_URI_PARAM = "rdf_type_uri";
115    public static final String SEARCH_RDF_TYPE_TABLE = "search_rdf_type";
116    public static final String ID_COLUMN = "id";
117    private static final String GROUP_CONCAT_FUNCTION = "group_concat_function";
118    private static final String POSTGRES_GROUP_CONCAT_FUNCTION = "STRING_AGG(rt.rdf_type_uri, ',')";
119    private static final String DEFAULT_GROUP_CONCAT_FUNCTION = "GROUP_CONCAT(distinct rt.rdf_type_uri " +
120            "ORDER BY rt.rdf_type_uri ASC SEPARATOR ',')";
121
122    /*
123     * Insert an association between a RDF type and a resource.
124     */
125    private static final String INSERT_RDF_TYPE_ASSOC = "INSERT INTO " + SEARCH_RESOURCE_RDF_TYPE_TABLE + " (" +
126            RESOURCE_ID_PARAM + ", " + RDF_TYPE_ID_PARAM + ") VALUES (:resource_id, :rdf_type_id)";
127
128    /*
129     * Insert a new RDF type into the RDF type table.
130     */
131    private static final String INSERT_RDF_TYPE_MYSQLMARIA = "INSERT IGNORE INTO " + SEARCH_RDF_TYPE_TABLE + " (" +
132            RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri)";
133
134    private static final String INSERT_RDF_TYPE_POSTGRES = "INSERT INTO " + SEARCH_RDF_TYPE_TABLE + " (" +
135            RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri) ON CONFLICT DO NOTHING";
136
137    private static final String INSERT_RDF_TYPE_H2 = "MERGE INTO " + SEARCH_RDF_TYPE_TABLE + " (" +
138            RDF_TYPE_URI_PARAM + ") KEY (" + RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri)";
139
140    private static final Map<DbPlatform, String> INSERT_RDF_TYPE = Map.of(
141            MYSQL, INSERT_RDF_TYPE_MYSQLMARIA,
142            MARIADB, INSERT_RDF_TYPE_MYSQLMARIA,
143            POSTGRESQL, INSERT_RDF_TYPE_POSTGRES,
144            H2, INSERT_RDF_TYPE_H2
145    );
146
147    @Inject
148    private DataSource dataSource;
149
150    private NamedParameterJdbcTemplate jdbcTemplate;
151
152    private SimpleJdbcInsert jdbcInsertResource;
153
154    @Inject
155    private ResourceFactory resourceFactory;
156
157    private DbPlatform dbPlatForm;
158
159    private String rdfTables;
160
161    private static final RowMapper<RdfType> RDF_TYPE_ROW_MAPPER = (rs, rowNum) ->
162            new RdfType(rs.getLong("id"), rs.getString("rdf_type_uri"));
163
164    /**
165     * Setup database table and connection
166     */
167    @PostConstruct
168    public void setup() {
169        this.dbPlatForm = DbPlatform.fromDataSource(this.dataSource);
170        final var ddl = lookupDdl();
171        LOGGER.debug("Applying ddl: {}", ddl);
172        DatabasePopulatorUtils.execute(
173                new ResourceDatabasePopulator(new DefaultResourceLoader().getResource("classpath:" + ddl)),
174                this.dataSource);
175        this.jdbcTemplate = getNamedParameterJdbcTemplate();
176
177        jdbcInsertResource = new SimpleJdbcInsert(this.jdbcTemplate.getJdbcTemplate())
178                .withTableName(SIMPLE_SEARCH_TABLE)
179                .usingGeneratedKeyColumns(ID_COLUMN);
180
181        this.rdfTables = RDF_TYPE_TABLE.replace(GROUP_CONCAT_FUNCTION,
182                isPostgres() ? POSTGRES_GROUP_CONCAT_FUNCTION : DEFAULT_GROUP_CONCAT_FUNCTION);
183    }
184
185    private String lookupDdl() {
186        return DDL_MAP.get(dbPlatForm);
187    }
188
189    private NamedParameterJdbcTemplate getNamedParameterJdbcTemplate() {
190        return new NamedParameterJdbcTemplate(this.dataSource);
191    }
192
193    @Override
194    public SearchResult doSearch(final SearchParameters parameters) throws InvalidQueryException {
195        //translate parameters into a SQL query
196        final MapSqlParameterSource parameterSource = new MapSqlParameterSource();
197        final var whereClauses = new ArrayList<String>();
198        final var conditions = parameters.getConditions();
199        for (int i = 0; i < conditions.size(); i++) {
200            addWhereClause(i, parameterSource, whereClauses, conditions.get(i));
201        }
202
203        final var fields = parameters.getFields().stream().map(Condition.Field::toString).collect(Collectors.toList());
204        final boolean containsRDFTypeField = fields.contains(RDF_TYPE.toString());
205        if (containsRDFTypeField) {
206            whereClauses.add("s.id = r.resource_id");
207            whereClauses.add("r.resource_id = r_filter.resource_id");
208        }
209
210        final var sql =
211                new StringBuilder("SELECT " + String.join(",", fields) + " FROM " + SIMPLE_SEARCH_TABLE + " s");
212
213        if (containsRDFTypeField) {
214            sql.append(rdfTables);
215            var rdfTypeUriParamValue = "*";
216            for (final Condition condition: conditions) {
217                if (condition.getField().equals(RDF_TYPE)) {
218                    rdfTypeUriParamValue = condition.getObject();
219                    break;
220                }
221            }
222            parameterSource.addValue(RDF_TYPE_URI_PARAM, convertToSqlLikeWildcard(rdfTypeUriParamValue));
223        }
224
225        if (!whereClauses.isEmpty()) {
226            sql.append(" WHERE ");
227            for (final var it = whereClauses.iterator(); it.hasNext(); ) {
228                sql.append(it.next());
229                if (it.hasNext()) {
230                    sql.append(" AND ");
231                }
232            }
233        }
234        sql.append(" ORDER BY " + parameters.getOrderBy() + " " + parameters.getOrder());
235        sql.append(" LIMIT :limit OFFSET :offset");
236
237        parameterSource.addValue("limit", parameters.getMaxResults());
238        parameterSource.addValue("offset", parameters.getOffset());
239
240        final var rowMapper = new RowMapper<Map<String, Object>>() {
241            @Override
242            public Map<String, Object> mapRow(final ResultSet rs, final int rowNum) throws SQLException {
243                final Map<String, Object> map = new HashMap<>();
244                for (final String f : fields) {
245                    final var fieldStr = f.toString();
246                    var value = rs.getObject(fieldStr);
247                    if (value instanceof Timestamp) {
248                        //format as iso instant if timestamp
249                        value = ISO_INSTANT.format(Instant.ofEpochMilli(((Timestamp) value).getTime()));
250                    } else if (f.equals(RDF_TYPE.toString())) {
251                        //convert the comma-separate string to an array for rdf_type
252                        value = value.toString().split(",");
253                    }
254                    map.put(fieldStr, value);
255                }
256                return map;
257            }
258        };
259
260        final List<Map<String, Object>> items = jdbcTemplate.query(sql.toString(), parameterSource, rowMapper);
261        final var pagination = new PaginationInfo(parameters.getMaxResults(), parameters.getOffset());
262        LOGGER.debug("Search query with parameters: {} - {}", sql, parameters);
263        return new SearchResult(items, pagination);
264    }
265
266    private void addWhereClause(final int paramCount, final MapSqlParameterSource parameterSource,
267                                final List<String> whereClauses,
268                                final Condition condition) throws InvalidQueryException {
269        final var field = condition.getField();
270        final var operation = condition.getOperator();
271        var object = condition.getObject();
272        final var paramName = "param" + paramCount;
273        if ((field.equals(FEDORA_ID) || field.equals(MIME_TYPE)) &&
274                condition.getOperator().equals(Condition.Operator.EQ)) {
275            if (!object.equals("*")) {
276                final String whereClause;
277                if (object.contains("*")) {
278                    object = convertToSqlLikeWildcard(object);
279                    whereClause = field + " like :" + paramName;
280                } else {
281                    whereClause = field + " = :" + paramName;
282                }
283
284                whereClauses.add("s." +  whereClause);
285                parameterSource.addValue(paramName, object);
286            }
287        } else if (field.equals(Condition.Field.CREATED) || field.equals(Condition.Field.MODIFIED)) {
288            //parse date
289            try {
290                final var instant = InstantParser.parse(object);
291                whereClauses.add("s." + field + " " + operation.getStringValue() + " :" + paramName);
292                parameterSource.addValue(paramName, new Timestamp(instant.toEpochMilli()), Types.TIMESTAMP);
293            } catch (final Exception ex) {
294                throw new InvalidQueryException(ex.getMessage());
295            }
296        } else if (field.equals(CONTENT_SIZE)) {
297            try {
298                whereClauses.add(field + " " + operation.getStringValue() +
299                        " :" + paramName);
300                parameterSource.addValue(paramName, Long.parseLong(object), Types.INTEGER);
301            } catch (final Exception ex) {
302                throw new InvalidQueryException(ex.getMessage());
303            }
304        } else if (field.equals(RDF_TYPE) && condition.getOperator().equals(Condition.Operator.EQ) ) {
305           //allowed but no where clause added here.
306        } else {
307            throw new InvalidQueryException("Condition not supported: \"" + condition + "\"");
308        }
309    }
310
311    private String convertToSqlLikeWildcard(final String value) {
312        return value.replace("*", "%");
313    }
314
315    @Override
316    public void addUpdateIndex(final ResourceHeaders resourceHeaders) {
317        addUpdateIndex(null, resourceHeaders);
318    }
319
320    @Transactional
321    @Override
322    public void addUpdateIndex(final String txId, final ResourceHeaders resourceHeaders) {
323        final var fedoraId = resourceHeaders.getId();
324        final var fullId = fedoraId.getFullId();
325
326        if (fedoraId.isAcl() || fedoraId.isMemento()) {
327            LOGGER.debug("The search index does not include acls or mementos. Ignoring resource {}", fullId);
328            return;
329        }
330
331        final var selectParams = new MapSqlParameterSource();
332        selectParams.addValue(FEDORA_ID_PARAM, fullId);
333        final var result =
334                jdbcTemplate.queryForList(SELECT_BY_FEDORA_ID,
335                        selectParams);
336        try {
337            final var fedoraResource = resourceFactory.getResource(txId, fedoraId);
338            final var rdfTypes = fedoraResource.getTypes();
339            final var rdfTypeIds = findOrCreateRdfTypesInDb(rdfTypes);
340            final var params = new MapSqlParameterSource();
341            params.addValue(FEDORA_ID_PARAM, fullId);
342            params.addValue(MODIFIED_PARAM, new Timestamp(resourceHeaders.getLastModifiedDate().toEpochMilli()));
343            params.addValue(MIME_TYPE_PARAM, resourceHeaders.getMimeType());
344            params.addValue(CONTENT_SIZE_PARAM, resourceHeaders.getContentSize());
345            final var exists = result.size() > 0;
346            final Long resourcePrimaryKey;
347            if (exists) {
348                resourcePrimaryKey = (Long) result.get(0).get(ID_COLUMN);
349                jdbcTemplate.update(UPDATE_INDEX_SQL, params);
350                //delete rdf_type associations
351                deleteRdfTypeAssociations(resourcePrimaryKey);
352            } else {
353                params.addValue(CREATED_PARAM, new Timestamp(resourceHeaders.getCreatedDate().toEpochMilli()));
354                resourcePrimaryKey = jdbcInsertResource.executeAndReturnKey(params).longValue();
355            }
356            insertRdfTypeAssociations(rdfTypeIds, resourcePrimaryKey);
357        } catch (final Exception e) {
358            throw new RepositoryRuntimeException("Failed add/updated the search index for : " + fullId, e);
359        }
360    }
361
362    private void insertRdfTypeAssociations(final List<Long> rdfTypeIds, final Long resourceId) {
363        //add rdf type associations
364        final List<MapSqlParameterSource> parameterSourcesList = new ArrayList<>();
365        for (final var rdfTypeId : rdfTypeIds) {
366            final var assocParams = new MapSqlParameterSource();
367            assocParams.addValue(RESOURCE_ID_PARAM, resourceId);
368            assocParams.addValue(RDF_TYPE_ID_PARAM, rdfTypeId);
369            parameterSourcesList.add(assocParams);
370        }
371        final MapSqlParameterSource[] psArray = parameterSourcesList.toArray(new MapSqlParameterSource[0]);
372        jdbcTemplate.batchUpdate(INSERT_RDF_TYPE_ASSOC, psArray);
373    }
374
375    private void deleteRdfTypeAssociations(final Long resourceId) {
376        final var deleteParams = new MapSqlParameterSource();
377        deleteParams.addValue(RESOURCE_ID_PARAM, resourceId);
378        jdbcTemplate.update(DELETE_RDF_TYPE_ASSOCIATIONS,
379                deleteParams);
380    }
381
382    private List<Long> findOrCreateRdfTypesInDb(final List<URI> rdfTypes) {
383        final List<String> rdfTypes_str = rdfTypes.stream().map(URI::toString).collect(Collectors.toList());
384
385        final List<RdfType> results = jdbcTemplate.query(SELECT_RDF_TYPE_ID,
386                Map.of(RDF_TYPE_URI_PARAM, rdfTypes_str), RDF_TYPE_ROW_MAPPER);
387        // List of existing type ids.
388        final List<Long> rdfTypeIds = new ArrayList<>();
389        // List of existing type uris.
390        final Set<String> rdfTypeUris = new HashSet<>();
391        for (final RdfType type : results) {
392            rdfTypeIds.add(type.getTypeId());
393            rdfTypeUris.add(type.getTypeUri());
394        }
395        // Type uris that don't already have a record. Needs to be a set to avoid inserting the same URI and
396        final var missingUris = rdfTypes_str.stream().filter(t -> !rdfTypeUris.contains(t))
397                .collect(Collectors.toSet());
398
399        if (!missingUris.isEmpty()) {
400            final List<MapSqlParameterSource> parameterSourcesList = new ArrayList<>();
401            for (final var uri : missingUris) {
402                LOGGER.debug("Adding rdf type uri: " + uri);
403                final var ps = new MapSqlParameterSource();
404                ps.addValue(RDF_TYPE_URI_PARAM, uri);
405                parameterSourcesList.add(ps);
406            }
407            // Batch insert all the records.
408            final MapSqlParameterSource[] psArray = parameterSourcesList.toArray(new MapSqlParameterSource[0]);
409            jdbcTemplate.batchUpdate(INSERT_RDF_TYPE.get(this.dbPlatForm), psArray);
410            // Do a single query for the ID to all the URIs we just inserted.
411            final List<RdfType> createdIds = jdbcTemplate.query(SELECT_RDF_TYPE_ID,
412                    Map.of(RDF_TYPE_URI_PARAM, missingUris), RDF_TYPE_ROW_MAPPER);
413            if (createdIds.size() != missingUris.size()) {
414                throw new RepositoryRuntimeException("Did not select all the items we inserted into the table");
415            }
416            rdfTypeIds.addAll(createdIds.stream().map(RdfType::getTypeId).collect(Collectors.toList()));
417        }
418        return rdfTypeIds;
419    }
420
421    /**
422     * Simple class to map rdf types.
423     */
424    private static class RdfType {
425        private String typeUri;
426        private Long typeId;
427
428        public RdfType(final Long id, final String uri) {
429            typeId = id;
430            typeUri = uri;
431        }
432
433        public Long getTypeId() {
434            return typeId;
435        }
436
437        public String getTypeUri() {
438            return typeUri;
439        }
440    }
441
442    @Override
443    public void removeFromIndex(final FedoraId fedoraId) {
444        try {
445            final var params = new MapSqlParameterSource();
446            params.addValue(FEDORA_ID_PARAM, fedoraId.getFullId());
447            jdbcTemplate.update(DELETE_FROM_INDEX_SQL, params);
448        } catch (final DataAccessException ex) {
449            throw new RepositoryRuntimeException("Failed to delete search index entry for " + fedoraId.getFullId());
450        }
451    }
452
453    @Override
454    public void reset() {
455        try (final var conn = this.dataSource.getConnection()) {
456            final var statement = conn.createStatement();
457            for (final var sql : toggleForeignKeyChecks(false)) {
458                statement.addBatch(sql);
459            }
460            statement.addBatch(truncateTable(SEARCH_RDF_TYPE_TABLE));
461            statement.addBatch(truncateTable(SEARCH_RESOURCE_RDF_TYPE_TABLE));
462            statement.addBatch(truncateTable(SIMPLE_SEARCH_TABLE));
463            for (final var sql : toggleForeignKeyChecks(true)) {
464                statement.addBatch(sql);
465            }
466            statement.executeBatch();
467        } catch (final SQLException e) {
468            throw new RepositoryRuntimeException("Failed to truncate search index tables", e);
469        }
470    }
471
472    private List<String> toggleForeignKeyChecks(final boolean enable) {
473
474        if (isPostgres()) {
475            return EMPTY_LIST;
476        } else {
477            return List.of("SET FOREIGN_KEY_CHECKS = " + (enable ? 1 : 0) + ";");
478        }
479    }
480
481    private boolean isPostgres() {
482        return dbPlatForm.equals(POSTGRESQL);
483    }
484
485    private String truncateTable(final String tableName) {
486        final var addCascade = isPostgres();
487        return "TRUNCATE TABLE " + tableName + (addCascade ? " CASCADE" : "") + ";";
488    }
489
490}