package de.julielab.gene.candidateretrieval;

import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.LoadingCache;
import de.julielab.gene.candidateretrieval.scoring.LuceneScorer;
import de.julielab.gene.candidateretrieval.scoring.MaxEntScorer;
import de.julielab.geneexpbase.GeneExpRuntimeException;
import de.julielab.geneexpbase.TermNormalizer;
import de.julielab.geneexpbase.candidateretrieval.CandidateCacheKey;
import de.julielab.geneexpbase.candidateretrieval.GeneCandidateRetrievalException;
import de.julielab.geneexpbase.candidateretrieval.QueryGenerator;
import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneName;
import de.julielab.geneexpbase.scoring.Scorer;
import de.julielab.geneexpbase.scoring.*;
import de.julielab.geneexpbase.services.CacheService;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.ngram.NGramFilterFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.inject.Inject;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;


public class LuceneCandidateRetrieval implements CandidateRetrieval {
    public static final boolean TEST_MODE = false;
    public static final QueryGenerator CONJUNCTION = new BooleanQueryGenerator(Occur.MUST, 0);
    public static final QueryGenerator DISJUNCTION = new BooleanQueryGenerator(Occur.SHOULD, -1);
    public static final QueryGenerator DISJUNCTION_MINUS_1 = new BooleanQueryGenerator(Occur.SHOULD, 1);
    public static final QueryGenerator DISJUNCTION_MINUS_2 = new BooleanQueryGenerator(Occur.SHOULD, 2);
    public static final QueryGenerator NGRAM_2_3 = new NGramQueryGenerator(2, 3);
    /**
     * Conjunctive normal form query where all tokens must be found in any field.
     */
    public static final QueryGenerator GENE_RECORDS_CNF = new GeneRecordQueryGenerator();
    /**
     * Just like {@link #GENE_RECORDS_CNF} but with an additional {@link BooleanClause.Occur#SHOULD} clause only for synonym matches created via {@link GeneRecordSynonymsQueryGenerator}.
     */
    public static final QueryGenerator GENE_RECORDS_CNF_WITH_SYNONYMS = new GeneRecordQueryGenerator(false, false, true, true);
    /**
     * Puts all tokens on all fields in one large disjunction. Thus, not every token needs to match. Used for context scoring of gene names.
     */
    public static final QueryGenerator GENE_RECORDS_FLAT_DISJUNCTION = new GeneRecordQueryGenerator(false, true, false, false);
    public static final QueryGenerator GENE_RECORDS_DISMAX = new GeneRecordQueryGenerator(true);
    public static final QueryGenerator GENE_RECORDS_SYNONYMS_APPROX = new GeneRecordSynonymsQueryGenerator(false);
    public static final QueryGenerator GENE_RECORDS_SYNONYMS_EXACT = new GeneRecordSynonymsQueryGenerator(true);
    public static final String NAME_PRIO_DELIMITER = "__";
    public static final String LOGGER_NAME_CANDIDATES = "de.julielab.jules.ae.genemapper.candidates";
    public static final int SIMPLE_SCORER = 0;
    public static final int TOKEN_JAROWINKLER_SCORER = 1;
    public static final int MAXENT_SCORER = 2;
    public static final int JAROWINKLER_SCORER = 3;
    public static final int LEVENSHTEIN_SCORER = 4;
    public static final int TFIDF = 5;
    public static final int LUCENE_SCORER = 10;
    /**
     * default model for MaxEntScorer
     */
    public static final String MAXENT_SCORER_MODEL = "/genemapper_jules_mallet.mod";
    public static final Logger candidateLog = LoggerFactory.getLogger(LOGGER_NAME_CANDIDATES);
    /**
     * the maximal number of hits lucene returns for a query
     */
    public static final int LUCENE_MAX_HITS = CandidateCacheKey.DEFAULT_MAX_CANDIDATES;
    private static final Logger log = LoggerFactory.getLogger(LuceneCandidateRetrieval.class);
    /**
     * This static map is supposed to make candidate caches available for all
     * instances of this class across the JVM. This is important since we often
     * use multiple gene-mapper instances in the same pipeline. It can save a
     * lot of time and also space.
     */
    private static final ConcurrentHashMap<String, LoadingCache<CandidateCacheKey, List<SynHit>>> caches = new ConcurrentHashMap<>();
    private static final AtomicLong totalGeneRecordFieldLoadingTime = new AtomicLong();
    private static final AtomicLong totalCacheGettime = new AtomicLong();
    private static final AtomicLong totalCachePuttime = new AtomicLong();
    private static final AtomicLong totalLuceneQueryTime = new AtomicLong();
    private static final AtomicLong cacheHits = new AtomicLong();
    private static final AtomicLong cacheMisses = new AtomicLong();
    private static final Cache<Thread, IndexSearcher> mentionIndexSearchers = CacheBuilder.newBuilder().weakKeys().weakValues().build();
    private static final Cache<Thread, IndexSearcher> geneRecordIndexSearchers = CacheBuilder.newBuilder().weakKeys().weakValues().build();
    private static final Cache<Thread, IndexSearcher> geneRecordOriginalNamesIndexSearchers = CacheBuilder.newBuilder().weakKeys().weakValues().build();
    private static final Map<String, IndexReader> geneRecordIndexReaders = new ConcurrentHashMap<>();
    private static final Map<String, IndexReader> geneRecordOriginalNamesIndexReaders = new ConcurrentHashMap<>();
    private static final Map<String, IndexReader> nameCentricIndexReaders = new ConcurrentHashMap<>();
    private static final Map<String, BiConsumer<GeneRecordHit, String[]>> fullTextFieldSetter = Map.of(SynonymIndexFieldNames.GENERIF, GeneRecordHit::setGeneRifs, SynonymIndexFieldNames.INTERACTION, GeneRecordHit::setInteractions, SynonymIndexFieldNames.GODESC, GeneRecordHit::setGoDescriptors, SynonymIndexFieldNames.SUMMARY, GeneRecordHit::setSummaries);
    private static final AtomicInteger instanceCounter = new AtomicInteger(0);
    public static Set<String> UNIT_TEST_GENE_ID_ACCUMULATION_SET;
    private static ExecutorService executorService;
    //    private static IndexSearcher geneRecordIndexSearcherInstance;
    private final Scorer exactScorer;
    private final Map<String, Float> globalFieldWeights;
    private final Boolean useLuceneCandidateCache;
    private IndexSearcher geneRecordIndexSearcher;
    private IndexSearcher geneRecordOriginalNamesIndexSearcher;
    private IndexReader geneRecordIndexReader;
    private IndexReader geneRecordOriginalNamesIndexReader;
    private IndexSearcher nameCentricIndexSearcher;
    private CustomAnalyzer ngramAnalyzer;
    // the model to be loaded for MaxEnt scorer
    // (can be specified in properties file)
    private String maxEntModel = MAXENT_SCORER_MODEL;
    private TermNormalizer normalizer;
    private Scorer approxScorer;
    private SpellChecker spellingChecker;
    private Configuration configuration;
    private javax.cache.Cache<CandidateCacheKey, List> candidateCache;
    private javax.cache.Cache<Pair<String, String>, String[]> geneRecordFieldCache;
    private javax.cache.Cache<String, TFIDFUtils> tfidfCache;

    @Deprecated
    public LuceneCandidateRetrieval(IndexSearcher mentionIndexSearcher, Scorer scorer) {
        LuceneCandidateRetrieval.mentionIndexSearchers.put(Thread.currentThread(), mentionIndexSearcher);
        this.exactScorer = scorer;
        this.normalizer = new TermNormalizer();
        this.globalFieldWeights = Collections.emptyMap();
        this.useLuceneCandidateCache = false;
    }

    @Inject
    public LuceneCandidateRetrieval(Configuration config, ExecutorService executorService, CacheService cacheService) throws GeneCandidateRetrievalException {
        this.configuration = config;
        LuceneCandidateRetrieval.executorService = executorService;
        this.useLuceneCandidateCache = config.getBoolean(Configuration.USE_LUCENE_CANDIDATES_CACHE).orElse(false);

        // lucene gene record index
        String geneRecordIndex = config.getProperty(Configuration.GENE_RECORD_INDEX);
        if (geneRecordIndex == null) {
            if (!TEST_MODE)
                throw new GeneCandidateRetrievalException("geneRecordIndex index not specified in configuration file (critical).");
            log.warn("Gene record index not specified in configuration file. Attempts to use it will cause errors.");
        }

        // lucene gene record index
        String geneRecordOriginalNamesIndex = config.getProperty(Configuration.GENE_RECORD_ORIGINAL_NAMES_INDEX);
        if (geneRecordOriginalNamesIndex == null) {
            log.warn("No value for property {} specified in the configuration. No original names will be available.", Configuration.GENE_RECORD_ORIGINAL_NAMES_INDEX);
        }

        // lucene mention index
        String mentionIndex = config.getProperty(Configuration.NAME_CENTRIC_INDEX);
        if (mentionIndex == null) {
            throw new GeneCandidateRetrievalException("name centric mention index not specified in configuration file (critical).");
        }

        try {
            synchronized (LuceneCandidateRetrieval.class) {
                int luceneConcurrencyLevel = Integer.parseInt((String) config.getOrDefault(Configuration.CONCURRENCY_LEVEL, "1"));
                log.info("Using Lucene concurrency level of {}. Note that concurrency is limited by the number of segments of the index.", luceneConcurrencyLevel);
            }

            // NIOFSDirectory performs better than FSDirectory, but not on windows (see NIOFSDirectory JavaDoc)
            boolean isWindows = System.getProperty("os.name").toLowerCase().contains("win");
            synchronized (LuceneCandidateRetrieval.class) {
                if (geneRecordIndexSearcher == null) {
                    geneRecordIndexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(Paths.get(geneRecordIndex))), executorService);
                    log.info("Gene record index has {} segments", ((StandardDirectoryReader) geneRecordIndexSearcher.getIndexReader()).getSegmentInfos().size());
                }
            }

            if (geneRecordOriginalNamesIndex != null) {
                if (new File(geneRecordIndex).exists()) {
                    synchronized (LuceneCandidateRetrieval.class) {
                        if (geneRecordOriginalNamesIndexSearcher == null) {
                            geneRecordOriginalNamesIndexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(Paths.get(geneRecordOriginalNamesIndex))), executorService);
                            log.info("Original gene names record names index has {} segments", ((StandardDirectoryReader) geneRecordOriginalNamesIndexSearcher.getIndexReader()).getSegmentInfos().size());
                        }
                    }
                } else {
                    log.warn("Original gene names record index {} does not exist. This index will not be available.", geneRecordIndex);
                }
            }

            if (mentionIndex != null) {
                if (new File(mentionIndex).exists()) {
                    if (nameCentricIndexSearcher == null) {
                        nameCentricIndexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(Paths.get(mentionIndex))), executorService);
                        log.info("Gene record index has {} segments", ((StandardDirectoryReader) nameCentricIndexSearcher.getIndexReader()).getSegmentInfos().size());
                    }
                } else {
                    log.warn("Name centric index {} does not exist. This index will not be available.", geneRecordIndex);
                }
            }


            String spellingIndexPath = config.getProperty(Configuration.SPELLING_INDEX);
            if (spellingIndexPath != null) {
                File spellingIndex = new File(spellingIndexPath);
                if (spellingIndex.exists())
                    spellingChecker = new SpellChecker(FSDirectory.open(spellingIndex.toPath()));
            }
            if (spellingChecker == null)
                log.warn(
                        "Spelling index was not given or file does not exist. No spelling correction can be done. Specified spelling index: {}",
                        spellingIndexPath);

            // scorer types
            String scorerType = config.getProperty(Configuration.EXACT_SCORER_TYPE);
            if (scorerType == null) {
                log.debug("No configuration value given for " + Configuration.EXACT_SCORER_TYPE);
                exactScorer = setScorerType(LUCENE_SCORER);
            } else {
                exactScorer = setScorerType(Integer.valueOf(scorerType));
            }

            scorerType = config.getProperty(Configuration.APPROX_SCORER_TYPE);
            if (scorerType == null) {
                log.debug("No configuration value given for " + Configuration.APPROX_SCORER_TYPE);
                approxScorer = setScorerType(LUCENE_SCORER);
            } else {
                approxScorer = setScorerType(Integer.valueOf(scorerType));
            }

            // maxent model
            String maxEntModel = config.getProperty("maxent_model");
            if (maxEntModel != null) {
                this.maxEntModel = maxEntModel;
            }

            this.normalizer = new TermNormalizer();
        } catch (IOException e) {
            throw new GeneCandidateRetrievalException(e);
        }

        log.info("Exact scorer: " + exactScorer);
        log.info("Approx scorer: " + approxScorer);

        candidateCache = cacheService.getCacheManager().getCache("candidates-cache");
        geneRecordFieldCache = cacheService.getCacheManager().getCache("generecord-field-cache");
        tfidfCache = cacheService.getCacheManager().getCache("tfidf-cache");

        try {
            Map<String, String> ngramFilterSettings = new HashMap<>();
            ngramFilterSettings.put("minGramSize", "2");
            ngramFilterSettings.put("maxGramSize", "3");
            ngramAnalyzer = CustomAnalyzer.builder()
                    .withTokenizer("whitespace")
                    .addTokenFilter(NGramFilterFactory.class, ngramFilterSettings)
                    .build();
        } catch (IOException e) {
            e.printStackTrace();
        }

        globalFieldWeights = new HashMap<>();
        config.getDouble(Configuration.dot(Configuration.PREFIX_CANDIDATE_RETRIEVAL, Configuration.PARAM_DISMAX_TIE_BREAKER)).ifPresent(d -> globalFieldWeights.put(Configuration.PARAM_DISMAX_TIE_BREAKER, (float) d));
        for (String field : GeneRecordQueryGenerator.ALL_FIELDS) {
            OptionalDouble fieldWeight = config.getDouble(Configuration.dot(Configuration.PREFIX_CANDIDATE_RETRIEVAL, field));
            fieldWeight.ifPresent(d -> globalFieldWeights.put(field, (float) d));
        }
        for (String field : GeneRecordQueryGenerator.SYNONYM_FIELDS) {
            OptionalDouble fieldWeight = config.getDouble(Configuration.dot(Configuration.PREFIX_CANDIDATE_RETRIEVAL, field + "_exact"));
            fieldWeight.ifPresent(d -> globalFieldWeights.put(field, (float) d));
        }
    }

    public static AtomicLong getTotalCacheGettime() {
        return totalCacheGettime;
    }

    public static AtomicLong getTotalGeneRecordFieldLoadingTime() {
        return totalGeneRecordFieldLoadingTime;
    }

    public static AtomicLong getTotalCachePuttime() {
        return totalCachePuttime;
    }

    public static AtomicLong getTotalLuceneQueryTime() {
        return totalLuceneQueryTime;
    }

    public static AtomicLong getCacheMisses() {
        return cacheMisses;
    }

    public static AtomicLong getCacheHits() {
        return cacheHits;
    }

    public TFIDFScorer getTFIDFOnGeneRecordNames() {
        TFIDFUtils utils;
        String[] synonymFields = GeneRecordQueryGenerator.SYNONYM_FIELDS;
        String cacheKey = configuration.getProperty(Configuration.GENE_RECORD_INDEX) + " " + Arrays.toString(synonymFields);
        synchronized (tfidfCache) {
            utils = tfidfCache.get(cacheKey);
            if (utils == null) {
                utils = new TFIDFUtils();
                utils.learnFromLuceneIndex(geneRecordIndexSearcher.getIndexReader(), synonymFields);
                tfidfCache.put(cacheKey, utils);
            }
        }
        return new TFIDFScorer(utils);
    }

    public TFIDFScorer getTFIDFOnGeneSynonyms() {
        TFIDFUtils utils;
        String[] synonymFields = new String[]{SynonymIndexFieldNames.LOOKUP_SYN_FIELD};
        String cacheKey = configuration.getProperty(Configuration.NAME_CENTRIC_INDEX) + " " + Arrays.toString(synonymFields);
        synchronized (tfidfCache) {
            utils = tfidfCache.get(cacheKey);
            if (utils == null) {
                utils = new TFIDFUtils();
                utils.learnFromLuceneIndex(nameCentricIndexSearcher.getIndexReader(), synonymFields);
                tfidfCache.put(cacheKey, utils);
            }
        }
        return new TFIDFScorer(utils);
    }

    private IndexSearcher getGeneRecordIndexSearcher() {
//        if (geneRecordIndexSearcherInstance == null)
//            geneRecordIndexSearcherInstance = new IndexSearcher(geneRecordIndexReader, executorService);
//        return geneRecordIndexSearcherInstance;
        return geneRecordIndexSearcher;
    }

    public Configuration getConfiguration() {
        return configuration;
    }

    public TermNormalizer getNormalizer() {
        return normalizer;
    }

    public void setNormalizer(TermNormalizer normalizer) {
        this.normalizer = normalizer;
    }

    public Scorer getScorer() {
        return exactScorer;
    }

    public SpellChecker getSpellingChecker() {
        return spellingChecker;
    }

    public Scorer setScorerType(int type) throws GeneCandidateRetrievalException {
        Scorer scorer;
        if (type == SIMPLE_SCORER) {
            scorer = new SimpleScorer();
        } else if (type == TOKEN_JAROWINKLER_SCORER) {
            scorer = new TokenJaroSimilarityScorer();
        } else if (type == MAXENT_SCORER) {
            if (!maxEntModel.equals(MAXENT_SCORER_MODEL)) {
                // InputStream in =
                // this.getClass().getResourceAsStream(MAXENT_SCORER_MODEL);
                scorer = new MaxEntScorer(new File(maxEntModel));
            } else {
                InputStream in = this.getClass().getResourceAsStream(MAXENT_SCORER_MODEL);
                scorer = new MaxEntScorer(in);
            }
        } else if (type == JAROWINKLER_SCORER) {
            scorer = new JaroWinklerScorer();
        } else if (type == LUCENE_SCORER) {
            scorer = new LuceneScorer();
        } else if (type == LEVENSHTEIN_SCORER) {
            scorer = new LevenshteinScorer();
        } else if (type == TFIDF) {
            scorer = getTFIDFOnGeneRecordNames();
        } else {
            throw new GeneCandidateRetrievalException("Unknown mention scorer type: " + type);
        }
        return scorer;
    }

    public String getScorerInfo() {
        if (exactScorer == null) {
            return "Lucene Score (unnormalized)";
        } else {
            return exactScorer.info();
        }
    }

    public int getScorerType() {
        return exactScorer.getScorerType();
    }

    @Override
    public List<SynHit> getCandidates(String originalSearchTerm, QueryGenerator queryGenerator) {
        GeneMention geneMention = new GeneMention(originalSearchTerm, normalizer);
        return getCandidates(geneMention, queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, QueryGenerator queryGenerator) {
        return getCandidates(geneMention, geneMention.getTaxonomyIds(), queryGenerator);
    }

    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> organisms, QueryGenerator
            queryGenerator) {
        return getCandidates(geneMention, null, organisms != null ? organisms : Collections.emptyList(), queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(GeneMention
                                              geneMention, Collection<String> geneIdsFilter, Collection<String> organisms, QueryGenerator queryGenerator) {
        return getCandidates(geneMention, geneIdsFilter, organisms, true, null, queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(GeneMention
                                              geneMention, Collection<String> geneIdsFilter, Collection<String> organisms, boolean loadFields, Parameters parameters, QueryGenerator queryGenerator) {
        List<SynHit> hits = new ArrayList<>();
        CandidateCacheKey key = new CandidateCacheKey(geneMention.getGeneName());
        key.setLoadSynHitFields(loadFields);
        key.setQueryGenerator(queryGenerator);
        if (parameters != null && parameters.getBoolean(Configuration.dot(Configuration.PREFIX_CANDIDATE_RETRIEVAL, Configuration.PARAM_USE_QUERY_FIELD_WEIGHTS)))
            key.setFieldWeights(getFieldWeightsFromParameters(parameters));
        if (queryGenerator instanceof GeneRecordQueryGenerator && ((GeneRecordQueryGenerator) queryGenerator).isUseContextGenesAsRelevanceSignal())
            key.setContextNames(geneMention.getContextGeneNames().collect(Collectors.toSet()));
        if (geneIdsFilter != null)
            key.setGeneIdsFilter(geneIdsFilter);
        if (organisms == null || organisms.isEmpty()) {
            hits = getCandidatesFromIndex(key);
            if (log.isTraceEnabled()) {
                int geneBegin = geneMention.getOffsets() != null ? geneMention.getBegin() : -1;
                int geneEnd = geneMention.getOffsets() != null ? geneMention.getEnd() : -1;
                log.trace("Returning {} candidates for gene mention {}[{}-{}]", hits.size(), key.getGeneName().getText(), geneBegin, geneEnd);
            }
        }
        if (organisms != null) {
            for (String taxonomyId : organisms) {
                key.setTaxId(taxonomyId);
                hits.addAll(getCandidatesFromIndex(key));
                // TopDocs foundDocs = getCandidatesFromIndex(key);
                // 2. assign score
                // List<SynHit> scoredHits = new ArrayList<SynHit>();
                // scoredHits = scoreHits(foundDocs, key.geneName);
                // 3. combine single hits to candidate clusters
                // ArrayList<SynHit> hits = combineHits(scoredHits);
                // 4. sort by SynHit's score (lucene score)

                if (log.isTraceEnabled()) {
                    int begin = -1;
                    int end = -1;
                    if (geneMention.getOffsets() != null) {
                        begin = geneMention.getBegin();
                        end = geneMention.getEnd();
                    }
                    log.trace("Returning {} candidates for gene mention {}[{}-{}] for taxonomy ID {}",
                            hits.size(), key.getGeneName().getText(), begin, end, organisms);
                }
            }
        }
//        boolean sortByMatchingField = parameters != null && parameters.getBoolean(Configuration.dot(Configuration.PREFIX_CANDIDATE_RETRIEVAL, Configuration.PARAM_SORT_CANDIDATES_BY_MATCHED_FIELD), false);
//        Collections.sort(hits, GeneRecordHit.getNormalizedExactMatchThenLuceneScoreComparator(geneMention.getNormalizedText(), sortByMatchingField));
        if (UNIT_TEST_GENE_ID_ACCUMULATION_SET != null) {
            synchronized (UNIT_TEST_GENE_ID_ACCUMULATION_SET) {
                hits.stream().map(SynHit::getId).forEach(UNIT_TEST_GENE_ID_ACCUMULATION_SET::add);
            }
        }
        return hits;
    }

    @Override
    public List<SynHit> getCandidates(String
                                              geneMentionText, Collection<String> geneIdsFilter, Collection<String> organism, QueryGenerator
                                              queryGenerator) {
        GeneMention geneMention = new GeneMention(geneMentionText, normalizer);
        return getCandidates(geneMention, geneIdsFilter, organism, queryGenerator);
    }

    public List<SynHit> getCandidates(String
                                              geneMentionText, Collection<String> geneIdsFilter, Collection<String> organism,
                                      boolean loadFields, QueryGenerator queryGenerator) {
        GeneMention geneMention = new GeneMention(geneMentionText, normalizer);
        return getCandidates(geneMention, geneIdsFilter, organism, queryGenerator);
    }

    private List<SynHit> getCandidatesFromIndexWithoutCache(CandidateCacheKey key)
            throws IOException, BooleanQuery.TooManyClauses {
        long luceneQueryTime = System.nanoTime();
        List<SynHit> synHits = getCandidatesFromRecordIndex(key);
        // This following is only needed when not using the gene records index
        //synHits = combineHits(synHits);
        luceneQueryTime = System.nanoTime() - luceneQueryTime;
        totalLuceneQueryTime.addAndGet(luceneQueryTime);
        return synHits;
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, String organism, QueryGenerator queryGenerator) {
        return getCandidates(geneMention, organism != null ? Arrays.asList(organism) : Collections.emptyList(), queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(String geneMentionText, String organism, QueryGenerator queryGenerator) {
        return getCandidates(new GeneMention(geneMentionText, normalizer), organism != null ? Arrays.asList(organism) : Collections.emptyList(), queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(String geneMentionText, Collection<String> organism, QueryGenerator
            queryGenerator) {
        return getCandidates(new GeneMention(geneMentionText, normalizer), organism, queryGenerator);
    }

    @Override
    /**
     * This will look up the Gene ID of a gene mention in the Lucene index and
     * return the matching Taxonomy IDs for it.
     *
     * @param geneId
     *            A Gene ID to look up in the index
     * @return A Taxonomy ID
     */
    public String mapGeneIdToTaxId(String geneId) {
        Set<GeneRecordHit> geneRecords = getGeneRecords(List.of(geneId));
        if (geneRecords != null) {
            Optional<GeneRecordHit> gene = geneRecords.stream().findAny();
            if (gene.isPresent()) {
                return gene.get().getTaxId();
            }
        }
        log.warn("GeneID: " + geneId + " was not found in the index.");
//        notInIndex.add(geneId);
        return "";
    }

//    public static Set<String> notInIndex = new HashSet<>();


    /**
     * Returns {@link GeneRecordHit} instances with all fields loaded.
     *
     * @param ids IDs of the gene records to return.
     * @return The records for the given IDs.
     */
    public List<SynHit> getIndexRecords(Collection<String> ids) {
        IndexSearcher indexSearcher = getGeneRecordIndexSearcher();
        return getIndexRecords(ids, indexSearcher);
    }

    public List<SynHit> getOriginalNamesIndexRecords(Collection<String> ids) {
        return getOriginalNamesIndexRecords(ids, null);
    }

    public List<SynHit> getOriginalNamesIndexRecords(Collection<String> ids, GeneName geneName) {
        IndexSearcher indexSearcher = getGeneRecordOriginalNamesIndexSearcher();
        return getIndexRecords(ids, geneName, GeneName::getText, indexSearcher);
    }

    public List<SynHit> getIndexRecords(Collection<String> ids, IndexSearcher indexSearcher) {
        return getIndexRecords(ids, null, GeneName::getNormalizedText, indexSearcher);
    }

    /**
     * @param ids           The gene IDs of the index items to retrieve.
     * @param geneName      The gene name to add as the mapped mention and to use to find the synonym matching the gene name best.
     * @param geneNameFunc  The function to be applied to <tt>geneName</tt> in order to retrieve a string for comparison.
     * @param indexSearcher The index searcher to use.
     * @return The found SynHits matching the input IDs.
     */
    public List<SynHit> getIndexRecords(Collection<String> ids, GeneName geneName, Function<GeneName, String> geneNameFunc, IndexSearcher indexSearcher) {
        try {
            List<SynHit> entries = new ArrayList<>(ids.size());
            for (String id : ids) {
                BooleanClause clause = new BooleanClause(new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, id)),
                        Occur.FILTER);
                BooleanQuery query = new BooleanQuery.Builder().add(clause).build();
                TopDocs result = indexSearcher.search(query, 1);
                if (result.totalHits.value > 0) {
                    int docID = result.scoreDocs[0].doc;
                    Document d = indexSearcher.doc(docID);
                    GeneRecordHit m = getRecordHit(true, geneName != null ? geneName : new GeneName("<retrieved by id>", normalizer), geneNameFunc, result.scoreDocs[0], d);
                    // Set the score to some value > 0 so it won't get dropped in the Lucene score ratio filtering in the DypsisCandidateRanker.
                    m.setLuceneScore(1);
                    entries.add(m);
                }
            }
            if (geneName != null)
                entries.sort(GeneRecordHit.getNormalizedExactMatchThenLuceneScoreComparator(geneNameFunc.apply(geneName), false));
            if (UNIT_TEST_GENE_ID_ACCUMULATION_SET != null)
                entries.forEach(e -> UNIT_TEST_GENE_ID_ACCUMULATION_SET.add(e.getId()));
            return entries;
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    @Override
    public List<SynHit> scoreIdsByBoWSynonyms(Collection<String> allSynonyms, Set<String> ids, QueryGenerator
            qg) {
        CandidateCacheKey cacheKey = new CandidateCacheKey(new GeneName(String.join(" ", allSynonyms), normalizer), null);
        cacheKey.setGeneIdsFilter(new HashSet<>(ids));
        cacheKey.setQueryGenerator(qg);
        cacheKey.setMaxHits(1000);
        cacheKey.setLoadSynHitFields(false);
        return getCandidatesFromIndex(cacheKey);
    }

    /**
     * <p>Scores each synonym in <tt>allSynonym</tt> against the IDs in <tt>ids</tt>.</p>
     * <p>Each resulting SynHit adds its mention score to the ID represented by this SynHit.</p>
     *
     * @param queryType
     * @param ids2entities
     * @param qg
     * @return
     */
    public Pair<Map<String, Double>, Map<String, Set<String>>> scoreSynonymsRecordIndex
    (String queryType, Map<String, Collection<GeneName>> ids2entities, Function<GeneRecordHit, String[]>
            synhit2namesFunc, QueryGenerator qg) {
        Map<String, Double> scores = new HashMap<>();
        Map<String, Set<String>> ids2synonyms = new HashMap<>();
        Map<String, Set<String>> ids2geneNameTokens = ids2entities.keySet().stream().collect(Collectors.toMap(Function.identity(), id -> ids2entities.get(id).stream().flatMap(gn -> Stream.of(gn.getNormalizedText().split("\\s+"))).collect(Collectors.toSet())));
        Map<String, Set<String>> ids2geneNames = ids2entities.keySet().stream().collect(Collectors.toMap(Function.identity(), id -> ids2entities.get(id).stream().flatMap(gn -> Stream.of(gn.getNormalizedText())).collect(Collectors.toSet())));
        Iterator<GeneName> namesIt = ids2entities.values().stream().flatMap(Collection::stream).iterator();
        while (namesIt.hasNext()) {
            GeneName synonym = namesIt.next();
            CandidateCacheKey cacheKey = new CandidateCacheKey(synonym, null);
            cacheKey.setGeneIdsFilter(new HashSet<>(ids2entities.keySet()));
            cacheKey.setQueryGenerator(qg);
            cacheKey.setMaxHits(1000);
            cacheKey.setLoadSynHitFields(synhit2namesFunc != null);
            String geneName = cacheKey.getGeneName().getNormalizedText();
            BiFunction<String, Set<String>, Boolean> namesHaveTokenOverlap = (name, tokens) -> Stream.of(name.split("\\s+")).anyMatch(tokens::contains);
            for (SynHit sh : getCandidatesFromIndex(cacheKey)) {
                // Distinguish between exact hits and approximate hits
                String hitId = sh.getId();
                scores.merge(hitId, (double) sh.getLuceneScore(), (s1, s2) -> s1 + s2);
                if (synhit2namesFunc != null) {
                    Set<String> names = ids2synonyms.compute(hitId, (k, v) -> v != null ? v : new HashSet<>());
                    String[] newnames = synhit2namesFunc.apply((GeneRecordHit) sh);
                    if (newnames != null) {
                        for (String name : newnames) {
                            boolean equalsName = geneName.equals(name);
                            if (name != null && ((queryType.equals("exact") && equalsName) || (queryType.equals("apprx") && !ids2geneNames.get(hitId).contains(name) && namesHaveTokenOverlap.apply(name, ids2geneNameTokens.get(hitId)))))
                                names.add(name);
                        }
                    }
                }
            }
        }
        // For approximate scoring it happens that also hit exact matches "by accident". We can't remedy this.
        // But at least we can detect that when no approx name at all was found that than the score should be 0.
        // Unfortunately, this won't work when an exact AND an approximate name was hit. Then the score will reflect
        // both matches but we can't know.
        if (queryType.equals("apprx")) {
            for (String id : ids2entities.keySet()) {
                Set<String> synonyms4id = ids2synonyms.get(id);
                if (synonyms4id != null && synonyms4id.isEmpty()) {
                    ids2synonyms.remove(id);
                    scores.remove(id);
                }
            }
        }
        return new ImmutablePair<>(scores, ids2synonyms);
    }

    @Override
    public List<SynHit> getCandidates(GeneMention
                                              gm, Collection<String> taxId, Parameters parameters, QueryGenerator queryGenerator) {
        return getCandidates(gm, Collections.emptyList(), taxId, true, parameters, queryGenerator);
    }

    private Map<String, Float> getFieldWeightsFromParameters(Map<String, Object> parameterMap) {
        if (parameterMap == null)
            parameterMap = Collections.emptyMap();
        Map<String, Float> fieldWeights = new HashMap<>();
        Object tieBreaker = parameterMap.get(Configuration.dot(Configuration.PREFIX_CANDIDATE_RETRIEVAL, Configuration.PARAM_DISMAX_TIE_BREAKER));
        if (tieBreaker == null)
            tieBreaker = globalFieldWeights.getOrDefault(Configuration.PARAM_DISMAX_TIE_BREAKER, 0.3f);
        else
            tieBreaker = Float.parseFloat((String) tieBreaker);
        fieldWeights.put(Configuration.PARAM_DISMAX_TIE_BREAKER, (Float) tieBreaker);
        for (String field : GeneRecordQueryGenerator.ALL_FIELDS) {
            Float defaultValue = globalFieldWeights.getOrDefault(field, 1f);

            String parameterValue = (String) parameterMap.get(Configuration.dot(Configuration.PREFIX_CANDIDATE_RETRIEVAL, field));
            float finalValue;
            if (parameterValue != null)
                finalValue = Float.parseFloat(parameterValue);
            else
                finalValue = defaultValue;
            fieldWeights.put(field, finalValue);
        }
        for (String field : GeneRecordQueryGenerator.SYNONYM_FIELDS) {
            String exactFieldName = field + "_exact";
            Float defaultValue = globalFieldWeights.getOrDefault(exactFieldName, 1f);

            String parameterValue = (String) parameterMap.get(Configuration.dot(Configuration.PREFIX_CANDIDATE_RETRIEVAL, exactFieldName));
            float finalValue;
            if (parameterValue != null)
                finalValue = Float.parseFloat(parameterValue);
            else
                finalValue = defaultValue;
            fieldWeights.put(exactFieldName, finalValue);
        }
        return fieldWeights;
    }


    public void close() {
        try {
            geneRecordIndexSearcher.getIndexReader().close();
            if (geneRecordOriginalNamesIndexSearcher != null && geneRecordOriginalNamesIndexSearcher.getIndexReader() != null)
                geneRecordOriginalNamesIndexSearcher.getIndexReader().close();
            geneRecordOriginalNamesIndexSearcher = null;
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    @Override
    public List<SynHit> getFamilyNames(GeneMention gm, QueryGenerator queryGenerator) {
        CandidateCacheKey cacheKey = new CandidateCacheKey(gm.getGeneName());
        cacheKey.setLoadSynHitFields(true);
        cacheKey.setTermFilter(SynonymIndexFieldNames.ENTITY_TYPE, GeneMention.SpecificType.FAMILYNAME.name());
        cacheKey.setQueryGenerator(queryGenerator);
        return getCandidatesFromIndex(cacheKey);
    }

    /**
     * This is the method that access the cache. This is important because before the SynHits are returned,
     * they must be cloned or changed on them will write back into the cache.
     *
     * @param key The cache key.
     * @return A new list that contains copies of the cached SynHits.
     * @throws ExecutionException If there is an issue with the cache.
     */
    private List<SynHit> getCandidatesFromIndex(CandidateCacheKey key) {
        long gettime = System.nanoTime();
        List<SynHit> synHits = useLuceneCandidateCache ? candidateCache.get(key) : null;
        if (synHits == null) {
            cacheMisses.addAndGet(1);
            long puttime = System.nanoTime();
            try {
                synHits = getCandidatesFromIndexWithoutCache(key);
            } catch (IOException e) {
                throw new GeneExpRuntimeException(e);
            }
            gettime = System.nanoTime() - gettime;
            totalCacheGettime.addAndGet(gettime);
            if (useLuceneCandidateCache)
                candidateCache.put(key, synHits);
            puttime = System.nanoTime() - puttime;
            totalCachePuttime.addAndGet(puttime);
        } else {
            cacheHits.addAndGet(1);
        }
        return synHits.stream().map(SynHit::clone).collect(Collectors.toList());
    }

    private IndexSearcher getGeneRecordOriginalNamesIndexSearcher() {
//        IndexSearcher indexSearcher = geneRecordOriginalNamesIndexSearchers.asMap().compute(Thread.currentThread(), (k, v) -> v != null ? v : new IndexSearcher(geneRecordOriginalNamesIndexReader, executorService));
//        return indexSearcher;
        return geneRecordOriginalNamesIndexSearcher;
    }

    private List<SynHit> getCandidatesFromRecordIndex(CandidateCacheKey key) throws IOException {
        try {
            List<SynHit> ret = new ArrayList<>();
            // In some cases, we had more than 1024 gene IDs. this caused an error in Lucene due to too may boolean clauses.
            // Thus, we split the IDs into batches if there are too many.
            final List<String> allGeneFilterIds = key.getGeneIdsFilter() instanceof List ? (List<String>) key.getGeneIdsFilter() : new ArrayList<>(key.getGeneIdsFilter());
            int batchNum = -1;
            final int maxClauseCount = BooleanQuery.getMaxClauseCount();
            do {
                ++batchNum;
                final List<String> currentBatch = allGeneFilterIds.size() <= maxClauseCount ? allGeneFilterIds : allGeneFilterIds.subList(batchNum * maxClauseCount, Math.min(batchNum * maxClauseCount + maxClauseCount, allGeneFilterIds.size()));
                key.setGeneIdsFilter(currentBatch);

                Query query = key.generateQuery();

                TopScoreDocCollector resultsCollector = TopScoreDocCollector.create(key.getMaxHits(), key.getMaxHits());
                IndexSearcher indexSearcher = getGeneRecordIndexSearcher();
                indexSearcher.search(query, resultsCollector);
                TopDocs topDocs = resultsCollector.topDocs();
                boolean loadSynHitFields = key.isLoadSynHitFields();
                for (ScoreDoc doc : topDocs.scoreDocs) {
//                Explanation explanation = indexSearcher.explain(query, doc.doc);
                    Document document = indexSearcher.doc(doc.doc);
                    GeneRecordHit sh = getRecordHit(loadSynHitFields, key.getGeneName(), GeneName::getNormalizedText, doc, document);
                    ret.add(sh);
                }
            }
            while (batchNum * maxClauseCount +maxClauseCount< allGeneFilterIds.size());
            Collections.sort(ret, GeneRecordHit.getNormalizedExactMatchThenLuceneScoreComparator(key.getGeneName().getNormalizedText(), true));
            return ret;
        } catch (BooleanQuery.TooManyClauses e) {
            log.warn("Got too many clauses exception from gene name \"{}\". Assuming that this is a tagging error and not returning any candidates.", key.getGeneName().getText());
            throw new IllegalArgumentException(e);
        }
    }

    private GeneRecordHit getRecordHit(boolean loadSynHitFields, GeneName geneName, Function<GeneName, String> geneNameFunc, ScoreDoc doc, Document
            document) {
        String id = document.getField(SynonymIndexFieldNames.ID_FIELD).stringValue();
        String taxId = null;
        if (loadSynHitFields)
            taxId = document.getField(SynonymIndexFieldNames.TAX_ID_FIELD).stringValue();

        IndexableField symbolField = document.getField(SynonymIndexFieldNames.SYMBOL);
        String symbol = symbolField != null ? symbolField.stringValue() : "";
        GeneRecordHit sh = new GeneRecordHit(symbol, doc.score, id, "<no source specified>");
        sh.setMappedMention(geneName != null ? geneName.getText() : "none");
        sh.setMappedGeneName(geneName);
        sh.setLuceneScore(doc.score);
        if (taxId != null) {
            sh.setTaxIds(Collections.singletonList(taxId));
            sh.setTaxId(taxId);
        }
        if (loadSynHitFields) {
            long time = System.nanoTime();
            sh.setSymbol(symbol);
            Optional.ofNullable(document.getField(SynonymIndexFieldNames.SYMBOL_FROM_NOMCENCLATURE)).ifPresent(f -> sh.setNomenclature(f.stringValue()));
            Optional.ofNullable(document.getField(SynonymIndexFieldNames.CHROMOSOME)).ifPresent(f -> sh.setChromosome(f.stringValue()));
            Optional.ofNullable(document.getField(SynonymIndexFieldNames.MAPLOCATION)).ifPresent(f -> sh.setMapLocation(f.stringValue()));
            sh.setSynonyms(Arrays.stream(document.getFields(SynonymIndexFieldNames.SYNONYMS)).map(IndexableField::stringValue).toArray(String[]::new));
            sh.setFullNames(Arrays.stream(document.getFields(SynonymIndexFieldNames.FULL_NAMES)).map(IndexableField::stringValue).toArray(String[]::new));
            sh.setOtherDesignations(Arrays.stream(document.getFields(SynonymIndexFieldNames.OTHER_DESIGNATIONS)).map(IndexableField::stringValue).toArray(String[]::new));
            sh.setXrefs(Arrays.stream(document.getFields(SynonymIndexFieldNames.XREFS)).map(IndexableField::stringValue).toArray(String[]::new));
            sh.setUniprotNames(Arrays.stream(document.getFields(SynonymIndexFieldNames.UNIPROT_NAMES)).map(IndexableField::stringValue).toArray(String[]::new));
            sh.setBioThesaurusNames(Arrays.stream(document.getFields(SynonymIndexFieldNames.BIO_THESAURUS)).map(IndexableField::stringValue).toArray(String[]::new));
            Optional.ofNullable(document.getField(SynonymIndexFieldNames.ECNUMBER)).ifPresent(f -> sh.setEcNumber(f.stringValue()));
            if (geneName != null) {
                Map<String, Pair<String, Double>> jaroWinklerScores = new HashMap<>();
                Map<String, Pair<String, Double>> synonymScores = new HashMap<>();
                Map<String, Pair<String, Double>> recordScores = new HashMap<>();
                Scorer jaroWinklerScorer = new JaroWinklerScorer();
//                Scorer synonymTfidfScorer = getTFIDFOnGeneSynonyms();
//                Scorer recordTfidfScorer = getTFIDFOnGeneRecordNames();
                Set<String> names = new HashSet<>();
                String name = geneNameFunc.apply(geneName);
                names.add(name);
                geneName.getAlternatives().stream().map(geneNameFunc).forEach(names::add);
                Optional<ImmutablePair<String, String>> exactMatch = Stream.concat(Arrays.stream(GeneRecordQueryGenerator.SYNONYM_FIELDS), Stream.of(SynonymIndexFieldNames.ECNUMBER)).map(document::getFields).flatMap(Arrays::stream).map(f -> new ImmutablePair<>(f.name(), f.stringValue())).filter(p -> names.contains(p.getRight())).findAny();
                if (exactMatch.isPresent()) {
                    sh.setExactMatch(true);
                    sh.setSynonym(exactMatch.get().getRight());
                    sh.setSynonymField(exactMatch.get().getLeft());
                    sh.setSynonymSimilarityScore(1);
                } else {
                    for (String synonymField : (Iterable<String>) () -> Stream.concat(Arrays.stream(GeneRecordQueryGenerator.SYNONYM_FIELDS), Stream.of(SynonymIndexFieldNames.ECNUMBER)).iterator()) {
                        Optional<String> anyExactMatch = Arrays.stream(document.getFields(synonymField)).map(IndexableField::stringValue).filter(names::contains).findAny();
                        if (anyExactMatch.isPresent()) {
                            sh.setExactMatch(true);
                            sh.setSynonym(anyExactMatch.get());
                            sh.setSynonymField(synonymField);
                            break;
                        } else {
                            for (IndexableField f : document.getFields(synonymField)) {
                                String s = f.stringValue();
                                double score = jaroWinklerScorer.getScore(s, name);
//                                double synonymScore = synonymTfidfScorer.getScore(s, name);
//                                double recordScore = recordTfidfScorer.getScore(s, name);
                                jaroWinklerScores.put(s, new ImmutablePair<>(synonymField, score));
//                                synonymScores.put(s, new ImmutablePair<>(synonymField, synonymScore));
//                                recordScores.put(s, new ImmutablePair<>(synonymField, recordScore));
                            }
                        }
                    }
                    if (!sh.isExactMatch()) {
                        // Find the best matching approximate synonym.
                        double bestApproxScore = 0;
                        String bestApproxMatch = null;
                        String bestMatchField = null;

                        double bestSynonymApproxScore = 0;
                        String bestSynonymApproxMatch = null;
                        String bestSynonymMatchField = null;

                        double bestRecordApproxScore = 0;
                        String bestRecordApproxMatch = null;
                        String bestRecordMatchField = null;
                        for (String synonym : jaroWinklerScores.keySet()) {
                            Pair<String, Double> pair = jaroWinklerScores.get(synonym);
//                            Pair<String, Double> synonymPair = synonymScores.get(synonym);
//                            Pair<String, Double> recordPair = recordScores.get(synonym);
                            double score = pair.getRight();
                            if (score > bestApproxScore) {
                                bestApproxScore = score;
                                bestApproxMatch = synonym;
                                bestMatchField = pair.getLeft();
                            }
//                            double synonymScore = synonymPair.getRight();
//                            if (synonymScore > bestSynonymApproxScore) {
//                                bestSynonymApproxScore = synonymScore;
//                                bestSynonymApproxMatch = synonym;
//                                bestSynonymMatchField = synonymPair.getLeft();
//                            }
//                            double recordScore = recordPair.getRight();
//                            if (recordScore > bestRecordApproxScore) {
//                                bestRecordApproxScore = recordScore;
//                                bestRecordApproxMatch = synonym;
//                                bestRecordMatchField = recordPair.getLeft();
//                            }
                        }
//                    System.out.println("SynScoring: [JW] Gene text: " + name + "; Best match: " + bestApproxMatch + "; best field: " + bestMatchField + "; best score: " + bestApproxScore);
//                    System.out.println("SynScoring: [SYN] Gene text: " + name + "; Best match: " + bestSynonymApproxMatch + "; best field: " + bestSynonymMatchField + "; best score: " + bestSynonymApproxScore);
//                    System.out.println("SynScoring: [REC] Gene text: " + name + "; Best match: " + bestRecordApproxMatch + "; best field: " + bestRecordMatchField + "; best score: " + bestRecordApproxScore);
                        sh.setSynonym(bestApproxMatch);
                        sh.setSynonymField(bestMatchField);
                        sh.setSynonymSimilarityScore(bestApproxScore);
                    }
                }
            }
            totalGeneRecordFieldLoadingTime.addAndGet(System.nanoTime() - time);
        }
        // When no name of the record had any similarity to the input at all; then, some other field produced the
        // match.
        if (sh.getSynonym() == null)
            sh.setSynonym(symbol);
        return sh;
    }

    /**
     * <p>Sets the full text / gene context fields (generif, summary, interactions) to instances of {@link GeneRecordHit}.</p>
     * <p>Note that this method accepts plain {@link SynHit} instances for convenience. But the actual objects
     * must be <tt>GeneRecordHits</tt>.</p>
     *
     * @param recordHits   The GeneRecordHits to set the full text / gene context values for.
     * @param fieldsToLoad The gene context fields to load and set. Must be included in {@link #fullTextFieldSetter}.
     */
    public void setFulltextFieldsToRecordHits(Collection<? extends SynHit> recordHits, Collection<String> fieldsToLoad) {
        // This map will hold those id/field combinations we have no cache hit for
        Map<Pair<String, String>, GeneRecordHit> id2hit = new HashMap<>();
        // First check the cache
        for (SynHit sh : recordHits) {
            GeneRecordHit grh = (GeneRecordHit) sh;
            for (String fieldName : fieldsToLoad) {
                ImmutablePair<String, String> cacheKey = new ImmutablePair<>(sh.getId(), fieldName);
                String[] fieldValues = geneRecordFieldCache.get(cacheKey);
                if (fieldValues != null) {
                    fullTextFieldSetter.get(fieldName).accept(grh, fieldValues);
                } else {
                    // Here we store those id/field combinations we couldn't retrieve a value for
                    id2hit.put(new ImmutablePair<>(sh.getId(), fieldName), grh);
                }
            }
        }

        try {
            // Now get the missing field from the index
            if (!id2hit.isEmpty()) {
                IndexSearcher indexSearcher = getGeneRecordIndexSearcher();
                BooleanQuery.Builder mainBuilder = new BooleanQuery.Builder();
                mainBuilder.add(new MatchAllDocsQuery(), Occur.MUST);
                BooleanQuery.Builder filterBuilder = new BooleanQuery.Builder();
                id2hit.keySet().stream().map(Pair::getLeft).forEach(id -> filterBuilder.add(new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, id)), Occur.SHOULD));
                mainBuilder.add(filterBuilder.build(), Occur.FILTER);

                TopDocs topdocs = indexSearcher.search(mainBuilder.build(), recordHits.size());
                for (ScoreDoc sd : topdocs.scoreDocs) {
                    Document document = indexSearcher.doc(sd.doc);
                    String id = document.getField(SynonymIndexFieldNames.ID_FIELD).stringValue();
                    for (String fieldName : fieldsToLoad) {
                        ImmutablePair<String, String> cacheKey = new ImmutablePair<>(id, fieldName);
                        // The the gene hit to be given its value for this field
                        GeneRecordHit sh = id2hit.get(cacheKey);
                        if (sh != null) {
                            String[] fieldValues = Arrays.stream(document.getFields(fieldName)).map(IndexableField::stringValue).toArray(String[]::new);
                            fullTextFieldSetter.get(fieldName).accept(sh, fieldValues);
                            // Put the retrieved values to the cache because they obviously weren't there before
                            geneRecordFieldCache.put(cacheKey, fieldValues);
                        }
                    }
                }
            }
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    public Set<GeneRecordHit> getGeneRecords(Collection<String> ids) {
        try {
            Set<GeneRecordHit> hits = new HashSet<>();
            IndexSearcher indexSearcher = getGeneRecordIndexSearcher();
            BooleanQuery.Builder mainBuilder = new BooleanQuery.Builder();
            mainBuilder.add(new MatchAllDocsQuery(), Occur.MUST);
            BooleanQuery.Builder filterBuilder = new BooleanQuery.Builder();
            ids.stream().forEach(id -> filterBuilder.add(new TermQuery(new Term(SynonymIndexFieldNames.ID_FIELD, id)), Occur.SHOULD));
            mainBuilder.add(filterBuilder.build(), Occur.FILTER);


            TopDocs topdocs = indexSearcher.search(mainBuilder.build(), ids.size());
            for (ScoreDoc sd : topdocs.scoreDocs) {
                Document doc = indexSearcher.doc(sd.doc);
                GeneRecordHit recordHit = getRecordHit(true, null, x -> "<none>", sd, doc);
                hits.add(recordHit);
            }
            if (UNIT_TEST_GENE_ID_ACCUMULATION_SET != null) {
                synchronized (UNIT_TEST_GENE_ID_ACCUMULATION_SET) {
                    hits.stream().map(SynHit::getId).forEach(UNIT_TEST_GENE_ID_ACCUMULATION_SET::add);
                }
            }
            return hits;
        } catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

//    /**
//     * <p>Tries to find gene candidates for <tt>gm</tt> that have one of the given taxonomy IDs.</p>
//     * <p>Returns those taxonomy IDs that were given in <tt>offeredTaxIds</tt> and for which a candidate with that
//     * tax Id was found where the synonym of the hit was similar enough to the actual gene name with respect to the
//     * Jaro-Winkler score. The threshold is a configuration parameter with key {@link de.julielab.speciesassignment.Configuration#PARAM_SPECIES_ASSIGNMENT_SINGULAR_SYN_SIM_THRESHOLD}.</p>
//     *
//     * @param gm            The gene mention.
//     * @param offeredTaxIds The possible tax IDs for the mention.
//     * @param parameterMap  The parameter for the Jaro-Winkler score similarity threshold.
//     * @return The tax IDs for which candidates could be found with respect to the given gene mention.
//     */
//    public Set<String> checkForCompatibleTaxonomyCandidates(GeneMention gm, Set<String> offeredTaxIds, Scorer
//            scorer, Parameters parameterMap) {
//        List<SynHit> candidates = getCandidates(gm, offeredTaxIds, LuceneCandidateRetrieval.GENE_RECORDS_CNF);
//        double synhitGenMentionSimilaryThreshold = parameterMap.getDouble(de.julielab.speciesassignment.Configuration.PARAM_SPECIES_ASSIGNMENT_SINGULAR_SYN_SIM_THRESHOLD);
//        Set<String> foundTaxIds = new HashSet<>();
//        for (SynHit sh : candidates) {
//            if (scorer.getScore(sh.getSynonym(), gm.getNormalizedText()) > synhitGenMentionSimilaryThreshold)
//                foundTaxIds.add(sh.getTaxId());
//        }
//        return Sets.intersection(offeredTaxIds, foundTaxIds).stream().collect(Collectors.toSet());
//    }
}
