001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.search.impl; 019 020import org.fcrepo.common.db.DbPlatform; 021import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 022import org.fcrepo.kernel.api.identifiers.FedoraId; 023import org.fcrepo.kernel.api.models.ResourceFactory; 024import org.fcrepo.kernel.api.models.ResourceHeaders; 025import org.fcrepo.search.api.Condition; 026import org.fcrepo.search.api.InvalidQueryException; 027import org.fcrepo.search.api.PaginationInfo; 028import org.fcrepo.search.api.SearchIndex; 029import org.fcrepo.search.api.SearchParameters; 030import org.fcrepo.search.api.SearchResult; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033import org.springframework.core.io.DefaultResourceLoader; 034import org.springframework.dao.DataAccessException; 035import org.springframework.jdbc.core.RowMapper; 036import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; 037import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; 038import org.springframework.jdbc.core.simple.SimpleJdbcInsert; 039import org.springframework.jdbc.datasource.init.DatabasePopulatorUtils; 040import org.springframework.jdbc.datasource.init.ResourceDatabasePopulator; 041import org.springframework.stereotype.Component; 042import org.springframework.transaction.annotation.Transactional; 043 044import javax.annotation.PostConstruct; 045import javax.inject.Inject; 046import javax.sql.DataSource; 047import java.net.URI; 048import java.sql.ResultSet; 049import java.sql.SQLException; 050import java.sql.Timestamp; 051import java.sql.Types; 052import java.time.Instant; 053import java.util.ArrayList; 054import java.util.HashMap; 055import java.util.HashSet; 056import java.util.List; 057import java.util.Map; 058import java.util.Set; 059import java.util.stream.Collectors; 060 061import static java.time.format.DateTimeFormatter.ISO_INSTANT; 062import static org.fcrepo.common.db.DbPlatform.H2; 063import static org.fcrepo.common.db.DbPlatform.MARIADB; 064import static org.fcrepo.common.db.DbPlatform.MYSQL; 065import static org.fcrepo.common.db.DbPlatform.POSTGRESQL; 066import static org.fcrepo.search.api.Condition.Field.CONTENT_SIZE; 067import static org.fcrepo.search.api.Condition.Field.FEDORA_ID; 068import static org.fcrepo.search.api.Condition.Field.MIME_TYPE; 069import static org.fcrepo.search.api.Condition.Field.RDF_TYPE; 070 071 072/** 073 * An implementation of the {@link SearchIndex} 074 * 075 * @author dbernstein 076 * @author whikloj 077 */ 078@Component("searchIndexImpl") 079public class DbSearchIndexImpl implements SearchIndex { 080 public static final String SELECT_RDF_TYPE_ID = "select id, rdf_type_uri from search_rdf_type where rdf_type_uri " + 081 "in (:rdf_type_uri)"; 082 private static final Logger LOGGER = LoggerFactory.getLogger(DbSearchIndexImpl.class); 083 private static final String SIMPLE_SEARCH_TABLE = "simple_search"; 084 private static final String DELETE_FROM_INDEX_SQL = "DELETE FROM simple_search WHERE fedora_id = :fedora_id;"; 085 private static final String UPDATE_INDEX_SQL = 086 "UPDATE simple_search SET modified = :modified, content_size = :content_size, mime_type =:mime_type " + 087 "WHERE fedora_id = :fedora_id;"; 088 private static final String SELECT_BY_FEDORA_ID = 089 "SELECT id FROM simple_search WHERE fedora_id = :fedora_id"; 090 private static final String FEDORA_ID_PARAM = "fedora_id"; 091 private static final String MODIFIED_PARAM = "modified"; 092 private static final String CONTENT_SIZE_PARAM = "content_size"; 093 private static final String MIME_TYPE_PARAM = "mime_type"; 094 private static final String CREATED_PARAM = "created"; 095 private static final String DELETE_RDF_TYPE_ASSOCIATIONS = 096 "DELETE FROM search_resource_rdf_type where resource_id = :resource_id"; 097 private static final String RDF_TYPE_TABLE = ", (SELECT rrt.resource_id, group_concat_function as rdf_type " + 098 "from search_resource_rdf_type rrt, " + 099 "search_rdf_type rt WHERE rrt.rdf_type_id = rt.id group by rrt.resource_id) r, " + 100 "(SELECT rrt.resource_id from search_resource_rdf_type rrt, search_rdf_type rt " + 101 "WHERE rt.rdf_type_uri like :rdf_type_uri and rrt.rdf_type_id = rt.id group by rrt.resource_id) r_filter"; 102 private static final String DEFAULT_DDL = "sql/default-search-index.sql"; 103 104 private static final Map<DbPlatform, String> DDL_MAP = Map.of( 105 MYSQL, DEFAULT_DDL, 106 H2, DEFAULT_DDL, 107 POSTGRESQL, "sql/postgresql-search-index.sql", 108 MARIADB, DEFAULT_DDL 109 ); 110 public static final String SEARCH_RESOURCE_RDF_TYPE_TABLE = "search_resource_rdf_type"; 111 public static final String RESOURCE_ID_PARAM = "resource_id"; 112 public static final String RDF_TYPE_ID_PARAM = "rdf_type_id"; 113 public static final String RDF_TYPE_URI_PARAM = "rdf_type_uri"; 114 public static final String SEARCH_RDF_TYPE_TABLE = "search_rdf_type"; 115 public static final String ID_COLUMN = "id"; 116 private static final String GROUP_CONCAT_FUNCTION = "group_concat_function"; 117 private static final String POSTGRES_GROUP_CONCAT_FUNCTION = "STRING_AGG(rt.rdf_type_uri, ',')"; 118 private static final String DEFAULT_GROUP_CONCAT_FUNCTION = "GROUP_CONCAT(distinct rt.rdf_type_uri " + 119 "ORDER BY rt.rdf_type_uri ASC SEPARATOR ',')"; 120 121 /* 122 * Insert an association between a RDF type and a resource. 123 */ 124 private static final String INSERT_RDF_TYPE_ASSOC = "INSERT INTO " + SEARCH_RESOURCE_RDF_TYPE_TABLE + " (" + 125 RESOURCE_ID_PARAM + ", " + RDF_TYPE_ID_PARAM + ") VALUES (:resource_id, :rdf_type_id)"; 126 127 /* 128 * Insert a new RDF type into the RDF type table. 129 */ 130 private static final String INSERT_RDF_TYPE_MYSQLMARIA = "INSERT IGNORE INTO " + SEARCH_RDF_TYPE_TABLE + " (" + 131 RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri)"; 132 133 private static final String INSERT_RDF_TYPE_POSTGRES = "INSERT INTO " + SEARCH_RDF_TYPE_TABLE + " (" + 134 RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri) ON CONFLICT DO NOTHING"; 135 136 private static final String INSERT_RDF_TYPE_H2 = "MERGE INTO " + SEARCH_RDF_TYPE_TABLE + " (" + 137 RDF_TYPE_URI_PARAM + ") KEY (" + RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri)"; 138 139 private static final Map<DbPlatform, String> INSERT_RDF_TYPE = Map.of( 140 MYSQL, INSERT_RDF_TYPE_MYSQLMARIA, 141 MARIADB, INSERT_RDF_TYPE_MYSQLMARIA, 142 POSTGRESQL, INSERT_RDF_TYPE_POSTGRES, 143 H2, INSERT_RDF_TYPE_H2 144 ); 145 146 @Inject 147 private DataSource dataSource; 148 149 private NamedParameterJdbcTemplate jdbcTemplate; 150 151 private SimpleJdbcInsert jdbcInsertResource; 152 153 @Inject 154 private ResourceFactory resourceFactory; 155 156 private DbPlatform dbPlatForm; 157 158 private String rdfTables; 159 160 private static final RowMapper<RdfType> RDF_TYPE_ROW_MAPPER = (rs, rowNum) -> 161 new RdfType(rs.getLong("id"), rs.getString("rdf_type_uri")); 162 163 /** 164 * Setup database table and connection 165 */ 166 @PostConstruct 167 public void setup() { 168 this.dbPlatForm = DbPlatform.fromDataSource(this.dataSource); 169 final var ddl = lookupDdl(); 170 LOGGER.debug("Applying ddl: {}", ddl); 171 DatabasePopulatorUtils.execute( 172 new ResourceDatabasePopulator(new DefaultResourceLoader().getResource("classpath:" + ddl)), 173 this.dataSource); 174 this.jdbcTemplate = getNamedParameterJdbcTemplate(); 175 176 jdbcInsertResource = new SimpleJdbcInsert(this.jdbcTemplate.getJdbcTemplate()) 177 .withTableName(SIMPLE_SEARCH_TABLE) 178 .usingGeneratedKeyColumns(ID_COLUMN); 179 180 this.rdfTables = RDF_TYPE_TABLE.replace(GROUP_CONCAT_FUNCTION, 181 isPostgres() ? POSTGRES_GROUP_CONCAT_FUNCTION : DEFAULT_GROUP_CONCAT_FUNCTION); 182 } 183 184 private String lookupDdl() { 185 return DDL_MAP.get(dbPlatForm); 186 } 187 188 private NamedParameterJdbcTemplate getNamedParameterJdbcTemplate() { 189 return new NamedParameterJdbcTemplate(this.dataSource); 190 } 191 192 @Override 193 public SearchResult doSearch(final SearchParameters parameters) throws InvalidQueryException { 194 //translate parameters into a SQL query 195 final MapSqlParameterSource parameterSource = new MapSqlParameterSource(); 196 final var whereClauses = new ArrayList<String>(); 197 final var conditions = parameters.getConditions(); 198 for (int i = 0; i < conditions.size(); i++) { 199 addWhereClause(i, parameterSource, whereClauses, conditions.get(i)); 200 } 201 202 final var fields = parameters.getFields().stream().map(Condition.Field::toString).collect(Collectors.toList()); 203 final boolean containsRDFTypeField = fields.contains(RDF_TYPE.toString()); 204 if (containsRDFTypeField) { 205 whereClauses.add("s.id = r.resource_id"); 206 whereClauses.add("r.resource_id = r_filter.resource_id"); 207 } 208 209 final var sql = 210 new StringBuilder("SELECT " + String.join(",", fields) + " FROM " + SIMPLE_SEARCH_TABLE + " s"); 211 212 if (containsRDFTypeField) { 213 sql.append(rdfTables); 214 var rdfTypeUriParamValue = "*"; 215 for (final Condition condition: conditions) { 216 if (condition.getField().equals(RDF_TYPE)) { 217 rdfTypeUriParamValue = condition.getObject(); 218 break; 219 } 220 } 221 parameterSource.addValue(RDF_TYPE_URI_PARAM, convertToSqlLikeWildcard(rdfTypeUriParamValue)); 222 } 223 224 if (!whereClauses.isEmpty()) { 225 sql.append(" WHERE "); 226 for (final var it = whereClauses.iterator(); it.hasNext(); ) { 227 sql.append(it.next()); 228 if (it.hasNext()) { 229 sql.append(" AND "); 230 } 231 } 232 } 233 sql.append(" ORDER BY " + parameters.getOrderBy() + " " + parameters.getOrder()); 234 sql.append(" LIMIT :limit OFFSET :offset"); 235 236 parameterSource.addValue("limit", parameters.getMaxResults()); 237 parameterSource.addValue("offset", parameters.getOffset()); 238 239 final var rowMapper = new RowMapper<Map<String, Object>>() { 240 @Override 241 public Map<String, Object> mapRow(final ResultSet rs, final int rowNum) throws SQLException { 242 final Map<String, Object> map = new HashMap<>(); 243 for (final String f : fields) { 244 final var fieldStr = f.toString(); 245 var value = rs.getObject(fieldStr); 246 if (value instanceof Timestamp) { 247 //format as iso instant if timestamp 248 value = ISO_INSTANT.format(Instant.ofEpochMilli(((Timestamp) value).getTime())); 249 } else if (f.equals(RDF_TYPE.toString())) { 250 //convert the comma-separate string to an array for rdf_type 251 value = value.toString().split(","); 252 } 253 map.put(fieldStr, value); 254 } 255 return map; 256 } 257 }; 258 259 final List<Map<String, Object>> items = jdbcTemplate.query(sql.toString(), parameterSource, rowMapper); 260 final var pagination = new PaginationInfo(parameters.getMaxResults(), parameters.getOffset()); 261 LOGGER.debug("Search query with parameters: {} - {}", sql, parameters); 262 return new SearchResult(items, pagination); 263 } 264 265 private void addWhereClause(final int paramCount, final MapSqlParameterSource parameterSource, 266 final List<String> whereClauses, 267 final Condition condition) throws InvalidQueryException { 268 final var field = condition.getField(); 269 final var operation = condition.getOperator(); 270 var object = condition.getObject(); 271 final var paramName = "param" + paramCount; 272 if ((field.equals(FEDORA_ID) || field.equals(MIME_TYPE)) && 273 condition.getOperator().equals(Condition.Operator.EQ)) { 274 if (!object.equals("*")) { 275 final String whereClause; 276 if (object.contains("*")) { 277 object = convertToSqlLikeWildcard(object); 278 whereClause = field + " like :" + paramName; 279 } else { 280 whereClause = field + " = :" + paramName; 281 } 282 283 whereClauses.add("s." + whereClause); 284 parameterSource.addValue(paramName, object); 285 } 286 } else if (field.equals(Condition.Field.CREATED) || field.equals(Condition.Field.MODIFIED)) { 287 //parse date 288 try { 289 final var instant = InstantParser.parse(object); 290 whereClauses.add("s." + field + " " + operation.getStringValue() + " :" + paramName); 291 parameterSource.addValue(paramName, new Timestamp(instant.toEpochMilli()), Types.TIMESTAMP); 292 } catch (final Exception ex) { 293 throw new InvalidQueryException(ex.getMessage()); 294 } 295 } else if (field.equals(CONTENT_SIZE)) { 296 try { 297 whereClauses.add(field + " " + operation.getStringValue() + 298 " :" + paramName); 299 parameterSource.addValue(paramName, Long.parseLong(object), Types.INTEGER); 300 } catch (final Exception ex) { 301 throw new InvalidQueryException(ex.getMessage()); 302 } 303 } else if (field.equals(RDF_TYPE) && condition.getOperator().equals(Condition.Operator.EQ) ) { 304 //allowed but no where clause added here. 305 } else { 306 throw new InvalidQueryException("Condition not supported: \"" + condition + "\""); 307 } 308 } 309 310 private String convertToSqlLikeWildcard(final String value) { 311 return value.replace("*", "%"); 312 } 313 314 @Override 315 public void addUpdateIndex(final ResourceHeaders resourceHeaders) { 316 addUpdateIndex(null, resourceHeaders); 317 } 318 319 @Transactional 320 @Override 321 public void addUpdateIndex(final String txId, final ResourceHeaders resourceHeaders) { 322 final var fedoraId = resourceHeaders.getId(); 323 final var fullId = fedoraId.getFullId(); 324 325 if (fedoraId.isAcl() || fedoraId.isMemento()) { 326 LOGGER.debug("The search index does not include acls or mementos. Ignoring resource {}", fullId); 327 return; 328 } 329 330 final var selectParams = new MapSqlParameterSource(); 331 selectParams.addValue(FEDORA_ID_PARAM, fullId); 332 final var result = 333 jdbcTemplate.queryForList(SELECT_BY_FEDORA_ID, 334 selectParams); 335 try { 336 final var fedoraResource = resourceFactory.getResource(txId, fedoraId); 337 final var rdfTypes = fedoraResource.getTypes(); 338 final var rdfTypeIds = findOrCreateRdfTypesInDb(rdfTypes); 339 final var params = new MapSqlParameterSource(); 340 params.addValue(FEDORA_ID_PARAM, fullId); 341 params.addValue(MODIFIED_PARAM, new Timestamp(resourceHeaders.getLastModifiedDate().toEpochMilli())); 342 params.addValue(MIME_TYPE_PARAM, resourceHeaders.getMimeType()); 343 params.addValue(CONTENT_SIZE_PARAM, resourceHeaders.getContentSize()); 344 final var exists = result.size() > 0; 345 final Long resourcePrimaryKey; 346 if (exists) { 347 resourcePrimaryKey = (Long) result.get(0).get(ID_COLUMN); 348 jdbcTemplate.update(UPDATE_INDEX_SQL, params); 349 //delete rdf_type associations 350 deleteRdfTypeAssociations(resourcePrimaryKey); 351 } else { 352 params.addValue(CREATED_PARAM, new Timestamp(resourceHeaders.getCreatedDate().toEpochMilli())); 353 resourcePrimaryKey = jdbcInsertResource.executeAndReturnKey(params).longValue(); 354 } 355 insertRdfTypeAssociations(rdfTypeIds, resourcePrimaryKey); 356 } catch (final Exception e) { 357 throw new RepositoryRuntimeException("Failed add/updated the search index for : " + fullId, e); 358 } 359 } 360 361 private void insertRdfTypeAssociations(final List<Long> rdfTypeIds, final Long resourceId) { 362 //add rdf type associations 363 final List<MapSqlParameterSource> parameterSourcesList = new ArrayList<>(); 364 for (final var rdfTypeId : rdfTypeIds) { 365 final var assocParams = new MapSqlParameterSource(); 366 assocParams.addValue(RESOURCE_ID_PARAM, resourceId); 367 assocParams.addValue(RDF_TYPE_ID_PARAM, rdfTypeId); 368 parameterSourcesList.add(assocParams); 369 } 370 final MapSqlParameterSource[] psArray = parameterSourcesList.toArray(new MapSqlParameterSource[0]); 371 jdbcTemplate.batchUpdate(INSERT_RDF_TYPE_ASSOC, psArray); 372 } 373 374 private void deleteRdfTypeAssociations(final Long resourceId) { 375 final var deleteParams = new MapSqlParameterSource(); 376 deleteParams.addValue(RESOURCE_ID_PARAM, resourceId); 377 jdbcTemplate.update(DELETE_RDF_TYPE_ASSOCIATIONS, 378 deleteParams); 379 } 380 381 private List<Long> findOrCreateRdfTypesInDb(final List<URI> rdfTypes) { 382 final List<String> rdfTypes_str = rdfTypes.stream().map(URI::toString).collect(Collectors.toList()); 383 384 final List<RdfType> results = jdbcTemplate.query(SELECT_RDF_TYPE_ID, 385 Map.of(RDF_TYPE_URI_PARAM, rdfTypes_str), RDF_TYPE_ROW_MAPPER); 386 // List of existing type ids. 387 final List<Long> rdfTypeIds = new ArrayList<>(); 388 // List of existing type uris. 389 final Set<String> rdfTypeUris = new HashSet<>(); 390 for (final RdfType type : results) { 391 rdfTypeIds.add(type.getTypeId()); 392 rdfTypeUris.add(type.getTypeUri()); 393 } 394 // Type uris that don't already have a record. Needs to be a set to avoid inserting the same URI and 395 final var missingUris = rdfTypes_str.stream().filter(t -> !rdfTypeUris.contains(t)) 396 .collect(Collectors.toSet()); 397 398 if (!missingUris.isEmpty()) { 399 final List<MapSqlParameterSource> parameterSourcesList = new ArrayList<>(); 400 for (final var uri : missingUris) { 401 LOGGER.debug("Adding rdf type uri: " + uri); 402 final var ps = new MapSqlParameterSource(); 403 ps.addValue(RDF_TYPE_URI_PARAM, uri); 404 parameterSourcesList.add(ps); 405 } 406 // Batch insert all the records. 407 final MapSqlParameterSource[] psArray = parameterSourcesList.toArray(new MapSqlParameterSource[0]); 408 jdbcTemplate.batchUpdate(INSERT_RDF_TYPE.get(this.dbPlatForm), psArray); 409 // Do a single query for the ID to all the URIs we just inserted. 410 final List<RdfType> createdIds = jdbcTemplate.query(SELECT_RDF_TYPE_ID, 411 Map.of(RDF_TYPE_URI_PARAM, missingUris), RDF_TYPE_ROW_MAPPER); 412 if (createdIds.size() != missingUris.size()) { 413 throw new RepositoryRuntimeException("Did not select all the items we inserted into the table"); 414 } 415 rdfTypeIds.addAll(createdIds.stream().map(RdfType::getTypeId).collect(Collectors.toList())); 416 } 417 return rdfTypeIds; 418 } 419 420 /** 421 * Simple class to map rdf types. 422 */ 423 private static class RdfType { 424 private String typeUri; 425 private Long typeId; 426 427 public RdfType(final Long id, final String uri) { 428 typeId = id; 429 typeUri = uri; 430 } 431 432 public Long getTypeId() { 433 return typeId; 434 } 435 436 public String getTypeUri() { 437 return typeUri; 438 } 439 } 440 441 @Override 442 public void removeFromIndex(final FedoraId fedoraId) { 443 try { 444 final var params = new MapSqlParameterSource(); 445 params.addValue(FEDORA_ID_PARAM, fedoraId.getFullId()); 446 jdbcTemplate.update(DELETE_FROM_INDEX_SQL, params); 447 } catch (final DataAccessException ex) { 448 throw new RepositoryRuntimeException("Failed to delete search index entry for " + fedoraId.getFullId()); 449 } 450 } 451 452 @Override 453 public void reset() { 454 try (final var conn = this.dataSource.getConnection()) { 455 final var statement = conn.createStatement(); 456 for (final var sql : toggleForeignKeyChecks(false)) { 457 statement.addBatch(sql); 458 } 459 statement.addBatch(truncateTable(SEARCH_RESOURCE_RDF_TYPE_TABLE)); 460 statement.addBatch(truncateTable(SIMPLE_SEARCH_TABLE)); 461 statement.addBatch(truncateTable(SEARCH_RDF_TYPE_TABLE)); 462 for (final var sql : toggleForeignKeyChecks(true)) { 463 statement.addBatch(sql); 464 } 465 statement.executeBatch(); 466 } catch (final SQLException e) { 467 throw new RepositoryRuntimeException("Failed to truncate search index tables", e); 468 } 469 } 470 471 private List<String> toggleForeignKeyChecks(final boolean enable) { 472 473 if (isPostgres()) { 474 return List.of( 475 togglePostgresTriggers(SEARCH_RESOURCE_RDF_TYPE_TABLE, enable), 476 togglePostgresTriggers(SEARCH_RDF_TYPE_TABLE, enable), 477 togglePostgresTriggers(SIMPLE_SEARCH_TABLE, enable) 478 ); 479 } else { 480 return List.of("SET FOREIGN_KEY_CHECKS = " + (enable ? 1 : 0) + ";"); 481 } 482 } 483 484 private boolean isPostgres() { 485 return dbPlatForm.equals(POSTGRESQL); 486 } 487 488 private String togglePostgresTriggers(final String tableName, final boolean enable) { 489 return "ALTER TABLE " + tableName + " " + 490 (enable ? "ENABLE" : "DISABLE") + " TRIGGER ALL;"; 491 } 492 493 private String truncateTable(final String tableName) { 494 final var addCascade = isPostgres(); 495 return "TRUNCATE TABLE " + tableName + (addCascade ? " CASCADE" : "") + ";"; 496 } 497 498}