001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.search.impl;
019
020import org.fcrepo.common.db.DbPlatform;
021import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
022import org.fcrepo.kernel.api.identifiers.FedoraId;
023import org.fcrepo.kernel.api.models.ResourceFactory;
024import org.fcrepo.kernel.api.models.ResourceHeaders;
025import org.fcrepo.search.api.Condition;
026import org.fcrepo.search.api.InvalidQueryException;
027import org.fcrepo.search.api.PaginationInfo;
028import org.fcrepo.search.api.SearchIndex;
029import org.fcrepo.search.api.SearchParameters;
030import org.fcrepo.search.api.SearchResult;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033import org.springframework.core.io.DefaultResourceLoader;
034import org.springframework.dao.DataAccessException;
035import org.springframework.jdbc.core.RowMapper;
036import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
037import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
038import org.springframework.jdbc.core.simple.SimpleJdbcInsert;
039import org.springframework.jdbc.datasource.init.DatabasePopulatorUtils;
040import org.springframework.jdbc.datasource.init.ResourceDatabasePopulator;
041import org.springframework.stereotype.Component;
042import org.springframework.transaction.annotation.Transactional;
043
044import javax.annotation.PostConstruct;
045import javax.inject.Inject;
046import javax.sql.DataSource;
047import java.net.URI;
048import java.sql.ResultSet;
049import java.sql.SQLException;
050import java.sql.Timestamp;
051import java.sql.Types;
052import java.time.Instant;
053import java.util.ArrayList;
054import java.util.HashMap;
055import java.util.HashSet;
056import java.util.List;
057import java.util.Map;
058import java.util.Set;
059import java.util.stream.Collectors;
060
061import static java.time.format.DateTimeFormatter.ISO_INSTANT;
062import static org.fcrepo.common.db.DbPlatform.H2;
063import static org.fcrepo.common.db.DbPlatform.MARIADB;
064import static org.fcrepo.common.db.DbPlatform.MYSQL;
065import static org.fcrepo.common.db.DbPlatform.POSTGRESQL;
066import static org.fcrepo.search.api.Condition.Field.CONTENT_SIZE;
067import static org.fcrepo.search.api.Condition.Field.FEDORA_ID;
068import static org.fcrepo.search.api.Condition.Field.MIME_TYPE;
069import static org.fcrepo.search.api.Condition.Field.RDF_TYPE;
070
071
072/**
073 * An implementation of the {@link SearchIndex}
074 *
075 * @author dbernstein
076 * @author whikloj
077 */
078@Component("searchIndexImpl")
079public class DbSearchIndexImpl implements SearchIndex {
080    public static final String SELECT_RDF_TYPE_ID = "select id, rdf_type_uri from search_rdf_type where rdf_type_uri " +
081            "in (:rdf_type_uri)";
082    private static final Logger LOGGER = LoggerFactory.getLogger(DbSearchIndexImpl.class);
083    private static final String SIMPLE_SEARCH_TABLE = "simple_search";
084    private static final String DELETE_FROM_INDEX_SQL = "DELETE FROM simple_search WHERE fedora_id = :fedora_id;";
085    private static final String UPDATE_INDEX_SQL =
086            "UPDATE simple_search SET modified = :modified, content_size = :content_size, mime_type =:mime_type " +
087                    "WHERE fedora_id = :fedora_id;";
088    private static final String SELECT_BY_FEDORA_ID =
089            "SELECT id FROM simple_search WHERE fedora_id = :fedora_id";
090    private static final String FEDORA_ID_PARAM = "fedora_id";
091    private static final String MODIFIED_PARAM = "modified";
092    private static final String CONTENT_SIZE_PARAM = "content_size";
093    private static final String MIME_TYPE_PARAM = "mime_type";
094    private static final String CREATED_PARAM = "created";
095    private static final String DELETE_RDF_TYPE_ASSOCIATIONS =
096            "DELETE FROM search_resource_rdf_type where resource_id = :resource_id";
097    private static final String RDF_TYPE_TABLE = ", (SELECT rrt.resource_id,  group_concat_function as rdf_type " +
098            "from search_resource_rdf_type rrt, " +
099            "search_rdf_type rt  WHERE rrt.rdf_type_id = rt.id group by rrt.resource_id) r, " +
100            "(SELECT rrt.resource_id from search_resource_rdf_type rrt, search_rdf_type rt " +
101            "WHERE rt.rdf_type_uri like :rdf_type_uri and rrt.rdf_type_id = rt.id group by rrt.resource_id) r_filter";
102    private static final String DEFAULT_DDL = "sql/default-search-index.sql";
103
104    private static final Map<DbPlatform, String> DDL_MAP = Map.of(
105            MYSQL, DEFAULT_DDL,
106            H2, DEFAULT_DDL,
107            POSTGRESQL, "sql/postgresql-search-index.sql",
108            MARIADB, DEFAULT_DDL
109    );
110    public static final String SEARCH_RESOURCE_RDF_TYPE_TABLE = "search_resource_rdf_type";
111    public static final String RESOURCE_ID_PARAM = "resource_id";
112    public static final String RDF_TYPE_ID_PARAM = "rdf_type_id";
113    public static final String RDF_TYPE_URI_PARAM = "rdf_type_uri";
114    public static final String SEARCH_RDF_TYPE_TABLE = "search_rdf_type";
115    public static final String ID_COLUMN = "id";
116    private static final String GROUP_CONCAT_FUNCTION = "group_concat_function";
117    private static final String POSTGRES_GROUP_CONCAT_FUNCTION = "STRING_AGG(rt.rdf_type_uri, ',')";
118    private static final String DEFAULT_GROUP_CONCAT_FUNCTION = "GROUP_CONCAT(distinct rt.rdf_type_uri " +
119            "ORDER BY rt.rdf_type_uri ASC SEPARATOR ',')";
120
121    /*
122     * Insert an association between a RDF type and a resource.
123     */
124    private static final String INSERT_RDF_TYPE_ASSOC = "INSERT INTO " + SEARCH_RESOURCE_RDF_TYPE_TABLE + " (" +
125            RESOURCE_ID_PARAM + ", " + RDF_TYPE_ID_PARAM + ") VALUES (:resource_id, :rdf_type_id)";
126
127    /*
128     * Insert a new RDF type into the RDF type table.
129     */
130    private static final String INSERT_RDF_TYPE_MYSQLMARIA = "INSERT IGNORE INTO " + SEARCH_RDF_TYPE_TABLE + " (" +
131            RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri)";
132
133    private static final String INSERT_RDF_TYPE_POSTGRES = "INSERT INTO " + SEARCH_RDF_TYPE_TABLE + " (" +
134            RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri) ON CONFLICT DO NOTHING";
135
136    private static final String INSERT_RDF_TYPE_H2 = "MERGE INTO " + SEARCH_RDF_TYPE_TABLE + " (" +
137            RDF_TYPE_URI_PARAM + ") KEY (" + RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri)";
138
139    private static final Map<DbPlatform, String> INSERT_RDF_TYPE = Map.of(
140            MYSQL, INSERT_RDF_TYPE_MYSQLMARIA,
141            MARIADB, INSERT_RDF_TYPE_MYSQLMARIA,
142            POSTGRESQL, INSERT_RDF_TYPE_POSTGRES,
143            H2, INSERT_RDF_TYPE_H2
144    );
145
146    @Inject
147    private DataSource dataSource;
148
149    private NamedParameterJdbcTemplate jdbcTemplate;
150
151    private SimpleJdbcInsert jdbcInsertResource;
152
153    @Inject
154    private ResourceFactory resourceFactory;
155
156    private DbPlatform dbPlatForm;
157
158    private String rdfTables;
159
160    private static final RowMapper<RdfType> RDF_TYPE_ROW_MAPPER = (rs, rowNum) ->
161            new RdfType(rs.getLong("id"), rs.getString("rdf_type_uri"));
162
163    /**
164     * Setup database table and connection
165     */
166    @PostConstruct
167    public void setup() {
168        this.dbPlatForm = DbPlatform.fromDataSource(this.dataSource);
169        final var ddl = lookupDdl();
170        LOGGER.debug("Applying ddl: {}", ddl);
171        DatabasePopulatorUtils.execute(
172                new ResourceDatabasePopulator(new DefaultResourceLoader().getResource("classpath:" + ddl)),
173                this.dataSource);
174        this.jdbcTemplate = getNamedParameterJdbcTemplate();
175
176        jdbcInsertResource = new SimpleJdbcInsert(this.jdbcTemplate.getJdbcTemplate())
177                .withTableName(SIMPLE_SEARCH_TABLE)
178                .usingGeneratedKeyColumns(ID_COLUMN);
179
180        this.rdfTables = RDF_TYPE_TABLE.replace(GROUP_CONCAT_FUNCTION,
181                isPostgres() ? POSTGRES_GROUP_CONCAT_FUNCTION : DEFAULT_GROUP_CONCAT_FUNCTION);
182    }
183
184    private String lookupDdl() {
185        return DDL_MAP.get(dbPlatForm);
186    }
187
188    private NamedParameterJdbcTemplate getNamedParameterJdbcTemplate() {
189        return new NamedParameterJdbcTemplate(this.dataSource);
190    }
191
192    @Override
193    public SearchResult doSearch(final SearchParameters parameters) throws InvalidQueryException {
194        //translate parameters into a SQL query
195        final MapSqlParameterSource parameterSource = new MapSqlParameterSource();
196        final var whereClauses = new ArrayList<String>();
197        final var conditions = parameters.getConditions();
198        for (int i = 0; i < conditions.size(); i++) {
199            addWhereClause(i, parameterSource, whereClauses, conditions.get(i));
200        }
201
202        final var fields = parameters.getFields().stream().map(Condition.Field::toString).collect(Collectors.toList());
203        final boolean containsRDFTypeField = fields.contains(RDF_TYPE.toString());
204        if (containsRDFTypeField) {
205            whereClauses.add("s.id = r.resource_id");
206            whereClauses.add("r.resource_id = r_filter.resource_id");
207        }
208
209        final var sql =
210                new StringBuilder("SELECT " + String.join(",", fields) + " FROM " + SIMPLE_SEARCH_TABLE + " s");
211
212        if (containsRDFTypeField) {
213            sql.append(rdfTables);
214            var rdfTypeUriParamValue = "*";
215            for (final Condition condition: conditions) {
216                if (condition.getField().equals(RDF_TYPE)) {
217                    rdfTypeUriParamValue = condition.getObject();
218                    break;
219                }
220            }
221            parameterSource.addValue(RDF_TYPE_URI_PARAM, convertToSqlLikeWildcard(rdfTypeUriParamValue));
222        }
223
224        if (!whereClauses.isEmpty()) {
225            sql.append(" WHERE ");
226            for (final var it = whereClauses.iterator(); it.hasNext(); ) {
227                sql.append(it.next());
228                if (it.hasNext()) {
229                    sql.append(" AND ");
230                }
231            }
232        }
233        sql.append(" ORDER BY " + parameters.getOrderBy() + " " + parameters.getOrder());
234        sql.append(" LIMIT :limit OFFSET :offset");
235
236        parameterSource.addValue("limit", parameters.getMaxResults());
237        parameterSource.addValue("offset", parameters.getOffset());
238
239        final var rowMapper = new RowMapper<Map<String, Object>>() {
240            @Override
241            public Map<String, Object> mapRow(final ResultSet rs, final int rowNum) throws SQLException {
242                final Map<String, Object> map = new HashMap<>();
243                for (final String f : fields) {
244                    final var fieldStr = f.toString();
245                    var value = rs.getObject(fieldStr);
246                    if (value instanceof Timestamp) {
247                        //format as iso instant if timestamp
248                        value = ISO_INSTANT.format(Instant.ofEpochMilli(((Timestamp) value).getTime()));
249                    } else if (f.equals(RDF_TYPE.toString())) {
250                        //convert the comma-separate string to an array for rdf_type
251                        value = value.toString().split(",");
252                    }
253                    map.put(fieldStr, value);
254                }
255                return map;
256            }
257        };
258
259        final List<Map<String, Object>> items = jdbcTemplate.query(sql.toString(), parameterSource, rowMapper);
260        final var pagination = new PaginationInfo(parameters.getMaxResults(), parameters.getOffset());
261        LOGGER.debug("Search query with parameters: {} - {}", sql, parameters);
262        return new SearchResult(items, pagination);
263    }
264
265    private void addWhereClause(final int paramCount, final MapSqlParameterSource parameterSource,
266                                final List<String> whereClauses,
267                                final Condition condition) throws InvalidQueryException {
268        final var field = condition.getField();
269        final var operation = condition.getOperator();
270        var object = condition.getObject();
271        final var paramName = "param" + paramCount;
272        if ((field.equals(FEDORA_ID) || field.equals(MIME_TYPE)) &&
273                condition.getOperator().equals(Condition.Operator.EQ)) {
274            if (!object.equals("*")) {
275                final String whereClause;
276                if (object.contains("*")) {
277                    object = convertToSqlLikeWildcard(object);
278                    whereClause = field + " like :" + paramName;
279                } else {
280                    whereClause = field + " = :" + paramName;
281                }
282
283                whereClauses.add("s." +  whereClause);
284                parameterSource.addValue(paramName, object);
285            }
286        } else if (field.equals(Condition.Field.CREATED) || field.equals(Condition.Field.MODIFIED)) {
287            //parse date
288            try {
289                final var instant = InstantParser.parse(object);
290                whereClauses.add("s." + field + " " + operation.getStringValue() + " :" + paramName);
291                parameterSource.addValue(paramName, new Timestamp(instant.toEpochMilli()), Types.TIMESTAMP);
292            } catch (final Exception ex) {
293                throw new InvalidQueryException(ex.getMessage());
294            }
295        } else if (field.equals(CONTENT_SIZE)) {
296            try {
297                whereClauses.add(field + " " + operation.getStringValue() +
298                        " :" + paramName);
299                parameterSource.addValue(paramName, Long.parseLong(object), Types.INTEGER);
300            } catch (final Exception ex) {
301                throw new InvalidQueryException(ex.getMessage());
302            }
303        } else if (field.equals(RDF_TYPE) && condition.getOperator().equals(Condition.Operator.EQ) ) {
304           //allowed but no where clause added here.
305        } else {
306            throw new InvalidQueryException("Condition not supported: \"" + condition + "\"");
307        }
308    }
309
310    private String convertToSqlLikeWildcard(final String value) {
311        return value.replace("*", "%");
312    }
313
314    @Override
315    public void addUpdateIndex(final ResourceHeaders resourceHeaders) {
316        addUpdateIndex(null, resourceHeaders);
317    }
318
319    @Transactional
320    @Override
321    public void addUpdateIndex(final String txId, final ResourceHeaders resourceHeaders) {
322        final var fedoraId = resourceHeaders.getId();
323        final var fullId = fedoraId.getFullId();
324
325        if (fedoraId.isAcl() || fedoraId.isMemento()) {
326            LOGGER.debug("The search index does not include acls or mementos. Ignoring resource {}", fullId);
327            return;
328        }
329
330        final var selectParams = new MapSqlParameterSource();
331        selectParams.addValue(FEDORA_ID_PARAM, fullId);
332        final var result =
333                jdbcTemplate.queryForList(SELECT_BY_FEDORA_ID,
334                        selectParams);
335        try {
336            final var fedoraResource = resourceFactory.getResource(txId, fedoraId);
337            final var rdfTypes = fedoraResource.getTypes();
338            final var rdfTypeIds = findOrCreateRdfTypesInDb(rdfTypes);
339            final var params = new MapSqlParameterSource();
340            params.addValue(FEDORA_ID_PARAM, fullId);
341            params.addValue(MODIFIED_PARAM, new Timestamp(resourceHeaders.getLastModifiedDate().toEpochMilli()));
342            params.addValue(MIME_TYPE_PARAM, resourceHeaders.getMimeType());
343            params.addValue(CONTENT_SIZE_PARAM, resourceHeaders.getContentSize());
344            final var exists = result.size() > 0;
345            final Long resourcePrimaryKey;
346            if (exists) {
347                resourcePrimaryKey = (Long) result.get(0).get(ID_COLUMN);
348                jdbcTemplate.update(UPDATE_INDEX_SQL, params);
349                //delete rdf_type associations
350                deleteRdfTypeAssociations(resourcePrimaryKey);
351            } else {
352                params.addValue(CREATED_PARAM, new Timestamp(resourceHeaders.getCreatedDate().toEpochMilli()));
353                resourcePrimaryKey = jdbcInsertResource.executeAndReturnKey(params).longValue();
354            }
355            insertRdfTypeAssociations(rdfTypeIds, resourcePrimaryKey);
356        } catch (final Exception e) {
357            throw new RepositoryRuntimeException("Failed add/updated the search index for : " + fullId, e);
358        }
359    }
360
361    private void insertRdfTypeAssociations(final List<Long> rdfTypeIds, final Long resourceId) {
362        //add rdf type associations
363        final List<MapSqlParameterSource> parameterSourcesList = new ArrayList<>();
364        for (final var rdfTypeId : rdfTypeIds) {
365            final var assocParams = new MapSqlParameterSource();
366            assocParams.addValue(RESOURCE_ID_PARAM, resourceId);
367            assocParams.addValue(RDF_TYPE_ID_PARAM, rdfTypeId);
368            parameterSourcesList.add(assocParams);
369        }
370        final MapSqlParameterSource[] psArray = parameterSourcesList.toArray(new MapSqlParameterSource[0]);
371        jdbcTemplate.batchUpdate(INSERT_RDF_TYPE_ASSOC, psArray);
372    }
373
374    private void deleteRdfTypeAssociations(final Long resourceId) {
375        final var deleteParams = new MapSqlParameterSource();
376        deleteParams.addValue(RESOURCE_ID_PARAM, resourceId);
377        jdbcTemplate.update(DELETE_RDF_TYPE_ASSOCIATIONS,
378                deleteParams);
379    }
380
381    private List<Long> findOrCreateRdfTypesInDb(final List<URI> rdfTypes) {
382        final List<String> rdfTypes_str = rdfTypes.stream().map(URI::toString).collect(Collectors.toList());
383
384        final List<RdfType> results = jdbcTemplate.query(SELECT_RDF_TYPE_ID,
385                Map.of(RDF_TYPE_URI_PARAM, rdfTypes_str), RDF_TYPE_ROW_MAPPER);
386        // List of existing type ids.
387        final List<Long> rdfTypeIds = new ArrayList<>();
388        // List of existing type uris.
389        final Set<String> rdfTypeUris = new HashSet<>();
390        for (final RdfType type : results) {
391            rdfTypeIds.add(type.getTypeId());
392            rdfTypeUris.add(type.getTypeUri());
393        }
394        // Type uris that don't already have a record. Needs to be a set to avoid inserting the same URI and
395        final var missingUris = rdfTypes_str.stream().filter(t -> !rdfTypeUris.contains(t))
396                .collect(Collectors.toSet());
397
398        if (!missingUris.isEmpty()) {
399            final List<MapSqlParameterSource> parameterSourcesList = new ArrayList<>();
400            for (final var uri : missingUris) {
401                LOGGER.debug("Adding rdf type uri: " + uri);
402                final var ps = new MapSqlParameterSource();
403                ps.addValue(RDF_TYPE_URI_PARAM, uri);
404                parameterSourcesList.add(ps);
405            }
406            // Batch insert all the records.
407            final MapSqlParameterSource[] psArray = parameterSourcesList.toArray(new MapSqlParameterSource[0]);
408            jdbcTemplate.batchUpdate(INSERT_RDF_TYPE.get(this.dbPlatForm), psArray);
409            // Do a single query for the ID to all the URIs we just inserted.
410            final List<RdfType> createdIds = jdbcTemplate.query(SELECT_RDF_TYPE_ID,
411                    Map.of(RDF_TYPE_URI_PARAM, missingUris), RDF_TYPE_ROW_MAPPER);
412            if (createdIds.size() != missingUris.size()) {
413                throw new RepositoryRuntimeException("Did not select all the items we inserted into the table");
414            }
415            rdfTypeIds.addAll(createdIds.stream().map(RdfType::getTypeId).collect(Collectors.toList()));
416        }
417        return rdfTypeIds;
418    }
419
420    /**
421     * Simple class to map rdf types.
422     */
423    private static class RdfType {
424        private String typeUri;
425        private Long typeId;
426
427        public RdfType(final Long id, final String uri) {
428            typeId = id;
429            typeUri = uri;
430        }
431
432        public Long getTypeId() {
433            return typeId;
434        }
435
436        public String getTypeUri() {
437            return typeUri;
438        }
439    }
440
441    @Override
442    public void removeFromIndex(final FedoraId fedoraId) {
443        try {
444            final var params = new MapSqlParameterSource();
445            params.addValue(FEDORA_ID_PARAM, fedoraId.getFullId());
446            jdbcTemplate.update(DELETE_FROM_INDEX_SQL, params);
447        } catch (final DataAccessException ex) {
448            throw new RepositoryRuntimeException("Failed to delete search index entry for " + fedoraId.getFullId());
449        }
450    }
451
452    @Override
453    public void reset() {
454        try (final var conn = this.dataSource.getConnection()) {
455            final var statement = conn.createStatement();
456            for (final var sql : toggleForeignKeyChecks(false)) {
457                statement.addBatch(sql);
458            }
459            statement.addBatch(truncateTable(SEARCH_RESOURCE_RDF_TYPE_TABLE));
460            statement.addBatch(truncateTable(SIMPLE_SEARCH_TABLE));
461            statement.addBatch(truncateTable(SEARCH_RDF_TYPE_TABLE));
462            for (final var sql : toggleForeignKeyChecks(true)) {
463                statement.addBatch(sql);
464            }
465            statement.executeBatch();
466        } catch (final SQLException e) {
467            throw new RepositoryRuntimeException("Failed to truncate search index tables", e);
468        }
469    }
470
471    private List<String> toggleForeignKeyChecks(final boolean enable) {
472
473        if (isPostgres()) {
474            return List.of(
475                    togglePostgresTriggers(SEARCH_RESOURCE_RDF_TYPE_TABLE, enable),
476                    togglePostgresTriggers(SEARCH_RDF_TYPE_TABLE, enable),
477                    togglePostgresTriggers(SIMPLE_SEARCH_TABLE, enable)
478            );
479        } else {
480            return List.of("SET FOREIGN_KEY_CHECKS = " + (enable ? 1 : 0) + ";");
481        }
482    }
483
484    private boolean isPostgres() {
485        return dbPlatForm.equals(POSTGRESQL);
486    }
487
488    private String togglePostgresTriggers(final String tableName, final boolean enable) {
489        return "ALTER TABLE " + tableName + " " +
490                (enable ? "ENABLE" : "DISABLE") + " TRIGGER ALL;";
491    }
492
493    private String truncateTable(final String tableName) {
494        final var addCascade = isPostgres();
495        return "TRUNCATE TABLE " + tableName + (addCascade ? " CASCADE" : "") + ";";
496    }
497
498}