001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.search.impl; 019 020import org.fcrepo.common.db.DbPlatform; 021import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 022import org.fcrepo.kernel.api.identifiers.FedoraId; 023import org.fcrepo.kernel.api.models.ResourceFactory; 024import org.fcrepo.kernel.api.models.ResourceHeaders; 025import org.fcrepo.search.api.Condition; 026import org.fcrepo.search.api.InvalidQueryException; 027import org.fcrepo.search.api.PaginationInfo; 028import org.fcrepo.search.api.SearchIndex; 029import org.fcrepo.search.api.SearchParameters; 030import org.fcrepo.search.api.SearchResult; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033import org.springframework.core.io.DefaultResourceLoader; 034import org.springframework.dao.DataAccessException; 035import org.springframework.jdbc.core.RowMapper; 036import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; 037import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; 038import org.springframework.jdbc.core.simple.SimpleJdbcInsert; 039import org.springframework.jdbc.datasource.init.DatabasePopulatorUtils; 040import org.springframework.jdbc.datasource.init.ResourceDatabasePopulator; 041import org.springframework.stereotype.Component; 042import org.springframework.transaction.annotation.Transactional; 043 044import javax.annotation.PostConstruct; 045import javax.inject.Inject; 046import javax.sql.DataSource; 047import java.net.URI; 048import java.sql.ResultSet; 049import java.sql.SQLException; 050import java.sql.Timestamp; 051import java.sql.Types; 052import java.time.Instant; 053import java.util.ArrayList; 054import java.util.HashMap; 055import java.util.HashSet; 056import java.util.List; 057import java.util.Map; 058import java.util.Set; 059import java.util.stream.Collectors; 060 061import static java.time.format.DateTimeFormatter.ISO_INSTANT; 062import static java.util.Collections.EMPTY_LIST; 063import static org.fcrepo.common.db.DbPlatform.H2; 064import static org.fcrepo.common.db.DbPlatform.MARIADB; 065import static org.fcrepo.common.db.DbPlatform.MYSQL; 066import static org.fcrepo.common.db.DbPlatform.POSTGRESQL; 067import static org.fcrepo.search.api.Condition.Field.CONTENT_SIZE; 068import static org.fcrepo.search.api.Condition.Field.FEDORA_ID; 069import static org.fcrepo.search.api.Condition.Field.MIME_TYPE; 070import static org.fcrepo.search.api.Condition.Field.RDF_TYPE; 071 072 073/** 074 * An implementation of the {@link SearchIndex} 075 * 076 * @author dbernstein 077 * @author whikloj 078 */ 079@Component("searchIndexImpl") 080public class DbSearchIndexImpl implements SearchIndex { 081 public static final String SELECT_RDF_TYPE_ID = "select id, rdf_type_uri from search_rdf_type where rdf_type_uri " + 082 "in (:rdf_type_uri)"; 083 private static final Logger LOGGER = LoggerFactory.getLogger(DbSearchIndexImpl.class); 084 private static final String SIMPLE_SEARCH_TABLE = "simple_search"; 085 private static final String DELETE_FROM_INDEX_SQL = "DELETE FROM simple_search WHERE fedora_id = :fedora_id;"; 086 private static final String UPDATE_INDEX_SQL = 087 "UPDATE simple_search SET modified = :modified, content_size = :content_size, mime_type =:mime_type " + 088 "WHERE fedora_id = :fedora_id;"; 089 private static final String SELECT_BY_FEDORA_ID = 090 "SELECT id FROM simple_search WHERE fedora_id = :fedora_id"; 091 private static final String FEDORA_ID_PARAM = "fedora_id"; 092 private static final String MODIFIED_PARAM = "modified"; 093 private static final String CONTENT_SIZE_PARAM = "content_size"; 094 private static final String MIME_TYPE_PARAM = "mime_type"; 095 private static final String CREATED_PARAM = "created"; 096 private static final String DELETE_RDF_TYPE_ASSOCIATIONS = 097 "DELETE FROM search_resource_rdf_type where resource_id = :resource_id"; 098 private static final String RDF_TYPE_TABLE = ", (SELECT rrt.resource_id, group_concat_function as rdf_type " + 099 "from search_resource_rdf_type rrt, " + 100 "search_rdf_type rt WHERE rrt.rdf_type_id = rt.id group by rrt.resource_id) r, " + 101 "(SELECT rrt.resource_id from search_resource_rdf_type rrt, search_rdf_type rt " + 102 "WHERE rt.rdf_type_uri like :rdf_type_uri and rrt.rdf_type_id = rt.id group by rrt.resource_id) r_filter"; 103 private static final String DEFAULT_DDL = "sql/default-search-index.sql"; 104 105 private static final Map<DbPlatform, String> DDL_MAP = Map.of( 106 MYSQL, DEFAULT_DDL, 107 H2, DEFAULT_DDL, 108 POSTGRESQL, "sql/postgresql-search-index.sql", 109 MARIADB, DEFAULT_DDL 110 ); 111 public static final String SEARCH_RESOURCE_RDF_TYPE_TABLE = "search_resource_rdf_type"; 112 public static final String RESOURCE_ID_PARAM = "resource_id"; 113 public static final String RDF_TYPE_ID_PARAM = "rdf_type_id"; 114 public static final String RDF_TYPE_URI_PARAM = "rdf_type_uri"; 115 public static final String SEARCH_RDF_TYPE_TABLE = "search_rdf_type"; 116 public static final String ID_COLUMN = "id"; 117 private static final String GROUP_CONCAT_FUNCTION = "group_concat_function"; 118 private static final String POSTGRES_GROUP_CONCAT_FUNCTION = "STRING_AGG(rt.rdf_type_uri, ',')"; 119 private static final String DEFAULT_GROUP_CONCAT_FUNCTION = "GROUP_CONCAT(distinct rt.rdf_type_uri " + 120 "ORDER BY rt.rdf_type_uri ASC SEPARATOR ',')"; 121 122 /* 123 * Insert an association between a RDF type and a resource. 124 */ 125 private static final String INSERT_RDF_TYPE_ASSOC = "INSERT INTO " + SEARCH_RESOURCE_RDF_TYPE_TABLE + " (" + 126 RESOURCE_ID_PARAM + ", " + RDF_TYPE_ID_PARAM + ") VALUES (:resource_id, :rdf_type_id)"; 127 128 /* 129 * Insert a new RDF type into the RDF type table. 130 */ 131 private static final String INSERT_RDF_TYPE_MYSQLMARIA = "INSERT IGNORE INTO " + SEARCH_RDF_TYPE_TABLE + " (" + 132 RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri)"; 133 134 private static final String INSERT_RDF_TYPE_POSTGRES = "INSERT INTO " + SEARCH_RDF_TYPE_TABLE + " (" + 135 RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri) ON CONFLICT DO NOTHING"; 136 137 private static final String INSERT_RDF_TYPE_H2 = "MERGE INTO " + SEARCH_RDF_TYPE_TABLE + " (" + 138 RDF_TYPE_URI_PARAM + ") KEY (" + RDF_TYPE_URI_PARAM + ") VALUES (:rdf_type_uri)"; 139 140 private static final Map<DbPlatform, String> INSERT_RDF_TYPE = Map.of( 141 MYSQL, INSERT_RDF_TYPE_MYSQLMARIA, 142 MARIADB, INSERT_RDF_TYPE_MYSQLMARIA, 143 POSTGRESQL, INSERT_RDF_TYPE_POSTGRES, 144 H2, INSERT_RDF_TYPE_H2 145 ); 146 147 @Inject 148 private DataSource dataSource; 149 150 private NamedParameterJdbcTemplate jdbcTemplate; 151 152 private SimpleJdbcInsert jdbcInsertResource; 153 154 @Inject 155 private ResourceFactory resourceFactory; 156 157 private DbPlatform dbPlatForm; 158 159 private String rdfTables; 160 161 private static final RowMapper<RdfType> RDF_TYPE_ROW_MAPPER = (rs, rowNum) -> 162 new RdfType(rs.getLong("id"), rs.getString("rdf_type_uri")); 163 164 /** 165 * Setup database table and connection 166 */ 167 @PostConstruct 168 public void setup() { 169 this.dbPlatForm = DbPlatform.fromDataSource(this.dataSource); 170 final var ddl = lookupDdl(); 171 LOGGER.debug("Applying ddl: {}", ddl); 172 DatabasePopulatorUtils.execute( 173 new ResourceDatabasePopulator(new DefaultResourceLoader().getResource("classpath:" + ddl)), 174 this.dataSource); 175 this.jdbcTemplate = getNamedParameterJdbcTemplate(); 176 177 jdbcInsertResource = new SimpleJdbcInsert(this.jdbcTemplate.getJdbcTemplate()) 178 .withTableName(SIMPLE_SEARCH_TABLE) 179 .usingGeneratedKeyColumns(ID_COLUMN); 180 181 this.rdfTables = RDF_TYPE_TABLE.replace(GROUP_CONCAT_FUNCTION, 182 isPostgres() ? POSTGRES_GROUP_CONCAT_FUNCTION : DEFAULT_GROUP_CONCAT_FUNCTION); 183 } 184 185 private String lookupDdl() { 186 return DDL_MAP.get(dbPlatForm); 187 } 188 189 private NamedParameterJdbcTemplate getNamedParameterJdbcTemplate() { 190 return new NamedParameterJdbcTemplate(this.dataSource); 191 } 192 193 @Override 194 public SearchResult doSearch(final SearchParameters parameters) throws InvalidQueryException { 195 //translate parameters into a SQL query 196 final MapSqlParameterSource parameterSource = new MapSqlParameterSource(); 197 final var whereClauses = new ArrayList<String>(); 198 final var conditions = parameters.getConditions(); 199 for (int i = 0; i < conditions.size(); i++) { 200 addWhereClause(i, parameterSource, whereClauses, conditions.get(i)); 201 } 202 203 final var fields = parameters.getFields().stream().map(Condition.Field::toString).collect(Collectors.toList()); 204 final boolean containsRDFTypeField = fields.contains(RDF_TYPE.toString()); 205 if (containsRDFTypeField) { 206 whereClauses.add("s.id = r.resource_id"); 207 whereClauses.add("r.resource_id = r_filter.resource_id"); 208 } 209 210 final var sql = 211 new StringBuilder("SELECT " + String.join(",", fields) + " FROM " + SIMPLE_SEARCH_TABLE + " s"); 212 213 if (containsRDFTypeField) { 214 sql.append(rdfTables); 215 var rdfTypeUriParamValue = "*"; 216 for (final Condition condition: conditions) { 217 if (condition.getField().equals(RDF_TYPE)) { 218 rdfTypeUriParamValue = condition.getObject(); 219 break; 220 } 221 } 222 parameterSource.addValue(RDF_TYPE_URI_PARAM, convertToSqlLikeWildcard(rdfTypeUriParamValue)); 223 } 224 225 if (!whereClauses.isEmpty()) { 226 sql.append(" WHERE "); 227 for (final var it = whereClauses.iterator(); it.hasNext(); ) { 228 sql.append(it.next()); 229 if (it.hasNext()) { 230 sql.append(" AND "); 231 } 232 } 233 } 234 sql.append(" ORDER BY " + parameters.getOrderBy() + " " + parameters.getOrder()); 235 sql.append(" LIMIT :limit OFFSET :offset"); 236 237 parameterSource.addValue("limit", parameters.getMaxResults()); 238 parameterSource.addValue("offset", parameters.getOffset()); 239 240 final var rowMapper = new RowMapper<Map<String, Object>>() { 241 @Override 242 public Map<String, Object> mapRow(final ResultSet rs, final int rowNum) throws SQLException { 243 final Map<String, Object> map = new HashMap<>(); 244 for (final String f : fields) { 245 final var fieldStr = f.toString(); 246 var value = rs.getObject(fieldStr); 247 if (value instanceof Timestamp) { 248 //format as iso instant if timestamp 249 value = ISO_INSTANT.format(Instant.ofEpochMilli(((Timestamp) value).getTime())); 250 } else if (f.equals(RDF_TYPE.toString())) { 251 //convert the comma-separate string to an array for rdf_type 252 value = value.toString().split(","); 253 } 254 map.put(fieldStr, value); 255 } 256 return map; 257 } 258 }; 259 260 final List<Map<String, Object>> items = jdbcTemplate.query(sql.toString(), parameterSource, rowMapper); 261 final var pagination = new PaginationInfo(parameters.getMaxResults(), parameters.getOffset()); 262 LOGGER.debug("Search query with parameters: {} - {}", sql, parameters); 263 return new SearchResult(items, pagination); 264 } 265 266 private void addWhereClause(final int paramCount, final MapSqlParameterSource parameterSource, 267 final List<String> whereClauses, 268 final Condition condition) throws InvalidQueryException { 269 final var field = condition.getField(); 270 final var operation = condition.getOperator(); 271 var object = condition.getObject(); 272 final var paramName = "param" + paramCount; 273 if ((field.equals(FEDORA_ID) || field.equals(MIME_TYPE)) && 274 condition.getOperator().equals(Condition.Operator.EQ)) { 275 if (!object.equals("*")) { 276 final String whereClause; 277 if (object.contains("*")) { 278 object = convertToSqlLikeWildcard(object); 279 whereClause = field + " like :" + paramName; 280 } else { 281 whereClause = field + " = :" + paramName; 282 } 283 284 whereClauses.add("s." + whereClause); 285 parameterSource.addValue(paramName, object); 286 } 287 } else if (field.equals(Condition.Field.CREATED) || field.equals(Condition.Field.MODIFIED)) { 288 //parse date 289 try { 290 final var instant = InstantParser.parse(object); 291 whereClauses.add("s." + field + " " + operation.getStringValue() + " :" + paramName); 292 parameterSource.addValue(paramName, new Timestamp(instant.toEpochMilli()), Types.TIMESTAMP); 293 } catch (final Exception ex) { 294 throw new InvalidQueryException(ex.getMessage()); 295 } 296 } else if (field.equals(CONTENT_SIZE)) { 297 try { 298 whereClauses.add(field + " " + operation.getStringValue() + 299 " :" + paramName); 300 parameterSource.addValue(paramName, Long.parseLong(object), Types.INTEGER); 301 } catch (final Exception ex) { 302 throw new InvalidQueryException(ex.getMessage()); 303 } 304 } else if (field.equals(RDF_TYPE) && condition.getOperator().equals(Condition.Operator.EQ) ) { 305 //allowed but no where clause added here. 306 } else { 307 throw new InvalidQueryException("Condition not supported: \"" + condition + "\""); 308 } 309 } 310 311 private String convertToSqlLikeWildcard(final String value) { 312 return value.replace("*", "%"); 313 } 314 315 @Override 316 public void addUpdateIndex(final ResourceHeaders resourceHeaders) { 317 addUpdateIndex(null, resourceHeaders); 318 } 319 320 @Transactional 321 @Override 322 public void addUpdateIndex(final String txId, final ResourceHeaders resourceHeaders) { 323 final var fedoraId = resourceHeaders.getId(); 324 final var fullId = fedoraId.getFullId(); 325 326 if (fedoraId.isAcl() || fedoraId.isMemento()) { 327 LOGGER.debug("The search index does not include acls or mementos. Ignoring resource {}", fullId); 328 return; 329 } 330 331 final var selectParams = new MapSqlParameterSource(); 332 selectParams.addValue(FEDORA_ID_PARAM, fullId); 333 final var result = 334 jdbcTemplate.queryForList(SELECT_BY_FEDORA_ID, 335 selectParams); 336 try { 337 final var fedoraResource = resourceFactory.getResource(txId, fedoraId); 338 final var rdfTypes = fedoraResource.getTypes(); 339 final var rdfTypeIds = findOrCreateRdfTypesInDb(rdfTypes); 340 final var params = new MapSqlParameterSource(); 341 params.addValue(FEDORA_ID_PARAM, fullId); 342 params.addValue(MODIFIED_PARAM, new Timestamp(resourceHeaders.getLastModifiedDate().toEpochMilli())); 343 params.addValue(MIME_TYPE_PARAM, resourceHeaders.getMimeType()); 344 params.addValue(CONTENT_SIZE_PARAM, resourceHeaders.getContentSize()); 345 final var exists = result.size() > 0; 346 final Long resourcePrimaryKey; 347 if (exists) { 348 resourcePrimaryKey = (Long) result.get(0).get(ID_COLUMN); 349 jdbcTemplate.update(UPDATE_INDEX_SQL, params); 350 //delete rdf_type associations 351 deleteRdfTypeAssociations(resourcePrimaryKey); 352 } else { 353 params.addValue(CREATED_PARAM, new Timestamp(resourceHeaders.getCreatedDate().toEpochMilli())); 354 resourcePrimaryKey = jdbcInsertResource.executeAndReturnKey(params).longValue(); 355 } 356 insertRdfTypeAssociations(rdfTypeIds, resourcePrimaryKey); 357 } catch (final Exception e) { 358 throw new RepositoryRuntimeException("Failed add/updated the search index for : " + fullId, e); 359 } 360 } 361 362 private void insertRdfTypeAssociations(final List<Long> rdfTypeIds, final Long resourceId) { 363 //add rdf type associations 364 final List<MapSqlParameterSource> parameterSourcesList = new ArrayList<>(); 365 for (final var rdfTypeId : rdfTypeIds) { 366 final var assocParams = new MapSqlParameterSource(); 367 assocParams.addValue(RESOURCE_ID_PARAM, resourceId); 368 assocParams.addValue(RDF_TYPE_ID_PARAM, rdfTypeId); 369 parameterSourcesList.add(assocParams); 370 } 371 final MapSqlParameterSource[] psArray = parameterSourcesList.toArray(new MapSqlParameterSource[0]); 372 jdbcTemplate.batchUpdate(INSERT_RDF_TYPE_ASSOC, psArray); 373 } 374 375 private void deleteRdfTypeAssociations(final Long resourceId) { 376 final var deleteParams = new MapSqlParameterSource(); 377 deleteParams.addValue(RESOURCE_ID_PARAM, resourceId); 378 jdbcTemplate.update(DELETE_RDF_TYPE_ASSOCIATIONS, 379 deleteParams); 380 } 381 382 private List<Long> findOrCreateRdfTypesInDb(final List<URI> rdfTypes) { 383 final List<String> rdfTypes_str = rdfTypes.stream().map(URI::toString).collect(Collectors.toList()); 384 385 final List<RdfType> results = jdbcTemplate.query(SELECT_RDF_TYPE_ID, 386 Map.of(RDF_TYPE_URI_PARAM, rdfTypes_str), RDF_TYPE_ROW_MAPPER); 387 // List of existing type ids. 388 final List<Long> rdfTypeIds = new ArrayList<>(); 389 // List of existing type uris. 390 final Set<String> rdfTypeUris = new HashSet<>(); 391 for (final RdfType type : results) { 392 rdfTypeIds.add(type.getTypeId()); 393 rdfTypeUris.add(type.getTypeUri()); 394 } 395 // Type uris that don't already have a record. Needs to be a set to avoid inserting the same URI and 396 final var missingUris = rdfTypes_str.stream().filter(t -> !rdfTypeUris.contains(t)) 397 .collect(Collectors.toSet()); 398 399 if (!missingUris.isEmpty()) { 400 final List<MapSqlParameterSource> parameterSourcesList = new ArrayList<>(); 401 for (final var uri : missingUris) { 402 LOGGER.debug("Adding rdf type uri: " + uri); 403 final var ps = new MapSqlParameterSource(); 404 ps.addValue(RDF_TYPE_URI_PARAM, uri); 405 parameterSourcesList.add(ps); 406 } 407 // Batch insert all the records. 408 final MapSqlParameterSource[] psArray = parameterSourcesList.toArray(new MapSqlParameterSource[0]); 409 jdbcTemplate.batchUpdate(INSERT_RDF_TYPE.get(this.dbPlatForm), psArray); 410 // Do a single query for the ID to all the URIs we just inserted. 411 final List<RdfType> createdIds = jdbcTemplate.query(SELECT_RDF_TYPE_ID, 412 Map.of(RDF_TYPE_URI_PARAM, missingUris), RDF_TYPE_ROW_MAPPER); 413 if (createdIds.size() != missingUris.size()) { 414 throw new RepositoryRuntimeException("Did not select all the items we inserted into the table"); 415 } 416 rdfTypeIds.addAll(createdIds.stream().map(RdfType::getTypeId).collect(Collectors.toList())); 417 } 418 return rdfTypeIds; 419 } 420 421 /** 422 * Simple class to map rdf types. 423 */ 424 private static class RdfType { 425 private String typeUri; 426 private Long typeId; 427 428 public RdfType(final Long id, final String uri) { 429 typeId = id; 430 typeUri = uri; 431 } 432 433 public Long getTypeId() { 434 return typeId; 435 } 436 437 public String getTypeUri() { 438 return typeUri; 439 } 440 } 441 442 @Override 443 public void removeFromIndex(final FedoraId fedoraId) { 444 try { 445 final var params = new MapSqlParameterSource(); 446 params.addValue(FEDORA_ID_PARAM, fedoraId.getFullId()); 447 jdbcTemplate.update(DELETE_FROM_INDEX_SQL, params); 448 } catch (final DataAccessException ex) { 449 throw new RepositoryRuntimeException("Failed to delete search index entry for " + fedoraId.getFullId()); 450 } 451 } 452 453 @Override 454 public void reset() { 455 try (final var conn = this.dataSource.getConnection()) { 456 final var statement = conn.createStatement(); 457 for (final var sql : toggleForeignKeyChecks(false)) { 458 statement.addBatch(sql); 459 } 460 statement.addBatch(truncateTable(SEARCH_RDF_TYPE_TABLE)); 461 statement.addBatch(truncateTable(SEARCH_RESOURCE_RDF_TYPE_TABLE)); 462 statement.addBatch(truncateTable(SIMPLE_SEARCH_TABLE)); 463 for (final var sql : toggleForeignKeyChecks(true)) { 464 statement.addBatch(sql); 465 } 466 statement.executeBatch(); 467 } catch (final SQLException e) { 468 throw new RepositoryRuntimeException("Failed to truncate search index tables", e); 469 } 470 } 471 472 private List<String> toggleForeignKeyChecks(final boolean enable) { 473 474 if (isPostgres()) { 475 return EMPTY_LIST; 476 } else { 477 return List.of("SET FOREIGN_KEY_CHECKS = " + (enable ? 1 : 0) + ";"); 478 } 479 } 480 481 private boolean isPostgres() { 482 return dbPlatForm.equals(POSTGRESQL); 483 } 484 485 private String truncateTable(final String tableName) { 486 final var addCascade = isPostgres(); 487 return "TRUNCATE TABLE " + tableName + (addCascade ? " CASCADE" : "") + ";"; 488 } 489 490}