/*
 * Copyright 2016 Global Crop Diversity Trust
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.genesys.taxonomy.checker;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.commons.lang3.StringUtils;
import org.genesys.taxonomy.checker.TaxonomyChecker.Taxon;
import org.genesys.taxonomy.gringlobal.model.IGrinSpecies;

import java.util.function.Function;

import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;

/**
 * {@link List} based in-memory "database". NOT THREAD-SAFE!
 */
@Slf4j
public class InMemoryTaxonomyDatabase implements TaxonomyDatabase {

	/** Percentage margin from best score. Matches with scores below bestScore * margin are ignored. */
	public static final double NONMATCH_MARGIN = 0.8;

	private static final String HYBRID_SPECIES_PREFIX = "x ";
	private static final String HYBRID_SPECIES_PREFIXES[] = { "x ", "X " };

	/** Hybrid marker */
	private static final String HYBRID_MARKER = " x ";

	/** Alternative hybrid markers */
	private static final String[] HYBRID_MARKER_ALT = { " x ", " X " };

	/** Pattern to split hybrid species to left and right part */
	private static final Pattern HYBRID_MARKER_REGEXP = Pattern.compile("\\s+[xX]\\s+");

	/** The genus id lookup. */
	private Map<Long, String> genusIdLookup = new HashMap<>();

	/** The genus id lookup. */
	private Map<String, List<Long>> genusIdSpecies = new HashMap<>();

	/** The species list lookup by genus name. */
	private Map<String, List<IGrinSpecies>> speciesLookup = new HashMap<>();

	/** The species lookup. */
	private Map<Long, IGrinSpecies> speciesIdLookup = new HashMap<>();

	/** The species rows. */
	private int speciesRows;

	/**
	 * Add a genus to the database.
	 *
	 * @param genusId the genus id
	 * @param genus the genus
	 */
	public void registerGenus(Long genusId, String genus) {
		genus = genus.trim();
		genusIdLookup.put(genusId, genus);
		genusIdSpecies.computeIfAbsent(genus, g -> new ArrayList<>()).add(genusId);
		speciesLookup.computeIfAbsent(genus, g -> new ArrayList<>(50));
	}

	/**
	 * Add species to the database.
	 *
	 * @param speciesRow the species row
	 * @throws TaxonomyException the taxonomy exception
	 */
	public void registerSpecies(IGrinSpecies speciesRow) throws TaxonomyException {
		List<IGrinSpecies> genusSpecies = speciesLookup.get(genusIdLookup.get(speciesRow.getGenusId()));
		if (genusSpecies == null)
			throw new TaxonomyException("No genus with specified genusId");

		genusSpecies.add(speciesRow);
		if (speciesIdLookup.containsKey(speciesRow.getSpeciesId())) {
			throw new TaxonomyException("Duplicate speciesId not allowed");
		}
		speciesIdLookup.put(speciesRow.getSpeciesId(), speciesRow);
		speciesRows++;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see java.lang.Object#toString()
	 */
	@Override
	public String toString() {
		return "InMemory Taxonomy Database: " + genusIdSpecies.size() + " genera" + " and " + speciesRows + " species";
	}

	/**
	 * Case insensitive search for genus.
	 *
	 * @param genus the genus
	 * @return true, if successful
	 */
	@Override
	public boolean containsGenus(String genus) {
		return genusIdSpecies.containsKey(genus);
		// .keySet().stream().anyMatch(g -> g.equalsIgnoreCase(genus));
	}

	/**
	 * Find similar genera. Unlike {@link #findSimilarGenus(String, int)}, this method will look for similar names regardless of input.
	 *
	 * @param genus base search name
	 * @param maxSize max matches to return
	 * @return List of names
	 */
	public List<String> findSimilarGenera(String genus, int maxSize) {
		return genusIdSpecies.keySet().stream()
			// convert each genus to Suggestion
			.map(candidate -> new Suggestion<String>(candidate, similarityScore(genus, candidate)))
			// keep decently scored suggestions
			.filter(scored -> scored.getScore() >= .5)
			// convert to sequential stream
			.sequential()
			// sort by score, descending
			.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
			// debug print
			.peek(InMemoryTaxonomyDatabase::print)
			// convert to suggested Strings
			.map(Suggestion::getSuggestion)
			// keep distinct results
			.distinct()
			// limit to maxSize
			.limit(maxSize).collect(Collectors.toList());
	}

	/**
	 * If {@code genus} is registered return empty list, otherwise search for similar genera.
	 *
	 * @param genus base search name
	 * @param maxSize max matches to return	 */
	@Override
	public List<String> findSimilarGenus(String genus, int maxSize) {

		BestScore bestScore = new BestScore();

		return genusIdSpecies.keySet().parallelStream()
				// convert each genus to Suggestion
				.map(candidate -> new Suggestion<String>(candidate, similarityScore(genus, candidate)))
				// keep decently scored suggestions
				.filter(scored -> scored.getScore() >= .5)
				// debug print
				// .peek(InMemoryTaxonomyDatabase::print)
				// convert to sequential stream
				.sequential()
				// debug print
				// .peek(InMemoryTaxonomyDatabase::print)
				// update best score
				.peek(scored -> bestScore.update(scored.getScore()))
				// sort by score, descending
				.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
				// debug print
				// .peek(InMemoryTaxonomyDatabase::print)
				// Prefer full match
				.filter(scored -> scored.getScore() >= (bestScore.getBestScore() == 1.0 ? 0.95 : (bestScore.getBestScore() * NONMATCH_MARGIN)))
				// debug print
				.peek(InMemoryTaxonomyDatabase::print)
				// convert to suggested Strings
				.map(Suggestion::getSuggestion)
				// keep distinct results
				.distinct()
				// limit to maxSize
				.limit(maxSize).collect(Collectors.toList());
	}

	/**
	 * Prints the.
	 *
	 * @param <T> the generic type
	 * @param suggestion the suggestion
	 */
	public static <T> void print(Suggestion<T> suggestion) {
		log.trace("Score={} suggestion={}", suggestion.getScore(), suggestion.getSuggestion());
	}

	/**
	 * Prints the.
	 *
	 * @param suggestion the suggestion
	 */
	public static void print(Object suggestion) {
		log.trace("{} = {}", (suggestion == null ? "NULL" : suggestion.getClass()), suggestion.toString());
	}

	/**
	 * Get all species for one genus
	 *
	 * @param genus genus name
	 * @return list of species, never null
	 */
	protected List<IGrinSpecies> getAllGenusSpecies(@NonNull String genus) {
		return speciesLookup.getOrDefault(StringUtils.capitalize(genus), List.of());
	}

	/**
	 * Gets the all species for specified genera.
	 *
	 * @param genera the list of genera
	 * @return the all genus species
	 */
	protected List<IGrinSpecies> getAllGenusSpecies(@NonNull List<String> genera) {
		if (genera.size() == 0) return List.of();
		if (genera.size() == 1) return getAllGenusSpecies(genera.get(0));

		return genera.stream()
			.filter(Objects::nonNull)
			// Let's be clever about capitalization
			.map(StringUtils::capitalize)
			// species list
			.map(speciesLookup::get)
			// filter!
			.filter(Objects::nonNull)
			// merge
			.reduce(new ArrayList<>(), (all, toAdd) -> { all.addAll(toAdd); return all; });
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#containsSpecies(java.lang.String, java.lang.String)
	 */
	@Override
	public boolean containsSpecies(String genus, String species) {
		log.trace("Does database contain genus={} species={}", genus, species);

		if (!genusIdSpecies.containsKey(genus)) {
			return false;
		}

		final boolean isSpecificHybrid = isNameSpecificHybrid(species);
		final boolean isHybrid = isNameHybrid(species);

		if (isHybrid) {
			String[] split = HYBRID_MARKER_REGEXP.split(species);
			String speciesLeft = split[0];
			String speciesRight = split[1];
			if (log.isTraceEnabled()) {
				log.trace("Species {} is a hybrid of {} and {}", species, speciesLeft, speciesRight);
			}
			// Check left and right
			return containsSpecies(genus, speciesLeft) && containsSpecies(genus, speciesRight);

		} else {
			return getAllGenusSpecies(genus).stream().anyMatch(speciesRow -> {
				if (isSpecificHybrid) {
					return "Y".equals(speciesRow.getIsSpecificHybrid()) && StringUtils.equals(species.substring(HYBRID_SPECIES_PREFIX.length()), speciesRow.getSpeciesName());
				} else {
					return StringUtils.equals(species, speciesRow.getSpeciesName());
				}
			});
		}
	}

	@Override
	public boolean containsSpecies(String genus, String species, String spAuthor) {
		log.trace("Does database contain genus={} species={} spAuthor={}", genus, species, spAuthor);

		if (!genusIdSpecies.containsKey(genus)) {
			return false;
		}

		final boolean isSpecificHybrid = isNameSpecificHybrid(species);
		final boolean isHybrid = isNameHybrid(species);

		if (isHybrid) {
			String[] split = HYBRID_MARKER_REGEXP.split(species);
			String speciesLeft = split[0];
			String speciesRight = split[1];
			if (log.isTraceEnabled()) {
				log.trace("Species {} is a hybrid of {} and {}", species, speciesLeft, speciesRight);
			}
			// Check left and right
			return containsSpecies(genus, speciesLeft) && containsSpecies(genus, speciesRight);

		} else {
			return getAllGenusSpecies(genus).stream().anyMatch(speciesRow -> {
				return (
					StringUtils.equals(species, speciesRow.getSpeciesName())
					&& (StringUtils.isNotBlank(spAuthor) ? StringUtils.equals(spAuthor, speciesRow.getSpeciesAuthority()) : true)
					&& (isSpecificHybrid ? "Y".equals(speciesRow.getIsSpecificHybrid()) : true)
				);
			});
		}
	}

	@Override
	public List<Taxon> findSimilar(String genus, String species, String spAuthor, String subtaxa, String subtAuthor, int maxSize) {
		log.info("Searching similar species for '{}' '{}' '{}' '{}' '{}'", genus, species, spAuthor, subtaxa, subtAuthor);
		if ("Unknown".equals(genus)) {
			// Don't make suggestions
			return List.of();
		}

		// Handle special case of no species name!
		if (StringUtils.isBlank(species)) {
			List<Long> genusId = genusIdSpecies.get(StringUtils.capitalize(genus));
			if (genusId != null) {
				return List.of(new Taxon(StringUtils.capitalize(genus), null, null, null, null));
			}
			return findSimilarGenus(genus, 3).stream().map(genusName -> new Taxon(genusName, null, null, null, null)).collect(Collectors.toList());
		}

		List<Long> genusId = genusIdSpecies.get(genus);
		List<String> genera = null;
		if (genusId == null) {
			genera = findSimilarGenera(genus, 3);
			log.info("Using: {}", genera);
		} else {
			genera = List.of(genus);
		}

		final boolean isHybrid = isNameHybrid(species);

		if (isHybrid) {
			String[] split = HYBRID_MARKER_REGEXP.split(species);
			String speciesLeft = split[0];
			String speciesRight = split[1];
			if (log.isTraceEnabled()) {
				log.trace("Species {} is a hybrid of {} and {}", species, speciesLeft, speciesRight);
			}

			List<Suggestion<String>> lefts = makeSuggestions(List.of(genus), speciesLeft, null).limit(maxSize)
					.map(suggestion -> new Suggestion<>(suggestion.getSuggestion().getSpeciesName(), suggestion.getScore())).collect(Collectors.toList());

			List<Suggestion<String>> rights = makeSuggestions(List.of(genus), speciesRight, null).limit(maxSize)
					.map(suggestion -> new Suggestion<>(suggestion.getSuggestion().getSpeciesName(), suggestion.getScore())).collect(Collectors.toList());

			// Inject the provided species name when the best counterpart is not a 100% match
			if (lefts.size() == 0 && rights.size() > 0 && rights.get(0).getScore() < 1) {
				lefts.add(new Suggestion<String>(speciesLeft, 0.1));
			} else if (rights.size() == 0 && lefts.size() > 0 && lefts.get(0).getScore() < 1) {
				rights.add(new Suggestion<String>(speciesRight, 0.1));
			}

			log.debug("Left for {} is {}", speciesLeft, lefts);
			log.debug("Right for {} is {}", speciesRight, rights);

			return crossJoinSpecies(lefts, rights)
				// stream results
				.stream()
				// order by score
				.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
				// convert sorted Suggestion list back to Strings
				.map(Suggestion::getSuggestion)
				// keep distinct elements
				.distinct()
				// limit to maxSize
				.limit(maxSize)
				.map(name -> new Taxon(StringUtils.capitalize(genus), name, null, null, null))
				// convert to List<String>
				.collect(Collectors.toList());
		}

		if (StringUtils.startsWithIgnoreCase(species, genus + " ")) {
			log.debug("Removed {} from {} -> {}", genus, species, species.substring(genus.length()).trim());
			species = species.substring(genus.length()).trim();
		}
		final boolean isSpecificHybrid = isNameSpecificHybrid(species);
		String speciesName = isSpecificHybrid ? species.substring(HYBRID_SPECIES_PREFIX.length()).trim() : species;

		Function<IGrinSpecies, Double> scorer = (IGrinSpecies speciesRow) -> {
			double score = 0.0;
			score += 2 * similarityScore(genus, genusIdLookup.get(speciesRow.getGenusId()));
			int whatMatters = 2;
			if (isSpecificHybrid) {
				score += "Y".equals(speciesRow.getIsSpecificHybrid()) ? 1 : 0;
				score += 4 * similarityScore(speciesName, speciesRow.getSpeciesName());
				whatMatters += 5;
			} else {
				score += 4 * similarityScore(speciesName, speciesRow.getSpeciesName());
				whatMatters += 4;
			}
			if (StringUtils.isNotBlank(spAuthor)) {
				score += similarityScore(spAuthor, speciesRow.getSpeciesAuthority());
				whatMatters++;
			}
			{
				// Good match on subtaxa is important!
				score += 10 * similarityScore(StringUtils.defaultIfBlank(subtaxa, null), speciesRow.getSubtaxa());
				whatMatters += 10;
			}
			if (StringUtils.isNotBlank(subtAuthor)) {
				score += similarityScore(subtAuthor, speciesRow.getSubtaxaAuthority());
				whatMatters++;
			}
			// {
			// 	// if record is current, give it a boost
			// 	score += speciesRow.isCurrent() ? 0.5 : 0;
			// 	whatMatters++;
			// }
			return score/whatMatters;
		};

		final BestScore bestScore = new BestScore();
		var suggestions2 = getAllGenusSpecies(genera).parallelStream()
			// print
			// .peek(InMemoryTaxonomyDatabase::print)
			// convert each candidate to a Suggestion
			.map(speciesRow -> new Suggestion<>(speciesRow, scorer.apply(speciesRow)))
			// // print
			// .peek(InMemoryTaxonomyDatabase::print)
			// keep suggestions with score over .6
			.filter(scored -> scored.getScore() >= .6)
			// print
			.peek(score -> log.debug("Score1 {}: {}", score.getScore(), score.getSuggestion()))
			// all together now
			.sequential()
			// generate bestScore -- sequential
			.peek(scored -> bestScore.update(scored.getScore()))
			// sort suggestions by score, descending
			.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
			// Limit
			.limit(maxSize)
			// keep suggestions: when bestScore is 1.0 keep suggestions over 95% otherwise keep suggestions within 90%
			.filter(scored -> scored.getScore() >= (bestScore.getBestScore() == 1.0 ? 0.95 : (bestScore.getBestScore() * 0.9)))
			// print
			.peek(score -> log.debug("Score {}: {}", score.getScore(), score.getSuggestion()))
			// results
			.map(suggestion -> new Taxon(suggestion.getSuggestion(), speciesIdLookup::get, genusIdLookup::get))
			.collect(Collectors.toList());

		return suggestions2;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#findSimilarSpecies(java.lang.String, java.lang.String, int)
	 */
	@Override
	public List<String> findSimilarSpecies(String genus, String species, String spAuthor, int maxSize) {
		log.debug("Searching similar species for genus={} species={}", genus, species);
		if ("Unknown".equals(genus)) {
			// Don't make suggestions
			return List.of();
		}
		List<Long> genusId = genusIdSpecies.get(genus);
		if (genusId == null) {
			throw new UnsupportedOperationException("Genus does not exist in database. Genus=" + genus);
		}

		final boolean isHybrid = isNameHybrid(species);

		if (isHybrid) {
			String[] split = HYBRID_MARKER_REGEXP.split(species);
			String speciesLeft = split[0];
			String speciesRight = split[1];
			if (log.isTraceEnabled()) {
				log.trace("Species {} is a hybrid of {} and {}", species, speciesLeft, speciesRight);
			}

			List<Suggestion<String>> lefts = makeSuggestions(List.of(genus), speciesLeft, spAuthor).limit(maxSize)
					.map(suggestion -> new Suggestion<>(suggestion.getSuggestion().getSpeciesName(), suggestion.getScore())).collect(Collectors.toList());

			List<Suggestion<String>> rights = makeSuggestions(List.of(genus), speciesRight, spAuthor).limit(maxSize)
					.map(suggestion -> new Suggestion<>(suggestion.getSuggestion().getSpeciesName(), suggestion.getScore())).collect(Collectors.toList());

			// Inject the provided species name when the best counterpart is not a 100% match
			if (lefts.size() == 0 && rights.size() > 0 && rights.get(0).getScore() < 1) {
				lefts.add(new Suggestion<String>(speciesLeft, 0.1));
			} else if (rights.size() == 0 && lefts.size() > 0 && lefts.get(0).getScore() < 1) {
				rights.add(new Suggestion<String>(speciesRight, 0.1));
			}

			if (log.isTraceEnabled()) {
				log.trace("Left for {} is {}", speciesLeft, lefts);
				log.trace("Right for {} is {}", speciesRight, rights);
			}

			return crossJoinSpecies(lefts, rights)
					// stream results
					.stream()
					// order by score
					.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
					// convert sorted Suggestion list back to Strings
					.map(Suggestion::getSuggestion)
					// keep distinct elements
					.distinct()
					// limit to maxSize
					.limit(maxSize)
					// convert to List<String>
					.collect(Collectors.toList());
		}

		return makeSuggestions(List.of(genus), species, spAuthor)
				// print
				// .peek(InMemoryTaxonomyDatabase::print)
				// convert sorted Suggestion list back to Strings
				.map(Suggestion::getSuggestion)
				// keep species name
				.map(IGrinSpecies::getSpeciesName)
				// keep distinct elements
				.distinct()
				// limit to maxSize
				.limit(maxSize)
				// convert to List<String>
				.collect(Collectors.toList());
	}

	/**
	 * Create a list of Suggestions pairing each one in left with each one in right. The score of the joined suggestion is a product of both scores.
	 * 
	 * @param lefts left suggestions
	 * @param rights right suggestions
	 * @return
	 * @return List of cross-joined suggestions from both lists
	 */
	private List<Suggestion<String>> crossJoinSpecies(List<Suggestion<String>> lefts, List<Suggestion<String>> rights) {
		List<Suggestion<String>> crossJoin = new ArrayList<>();
		for (Suggestion<String> l : lefts) {
			for (Suggestion<String> r : rights) {
				Suggestion<String> j = new Suggestion<>(l.getSuggestion().concat(HYBRID_MARKER).concat(r.getSuggestion()), l.getScore() * r.getScore());
				if (log.isTraceEnabled()) {
					log.trace("Cross-join '{}' with '{}' result={}", l, r, j);
				}
				crossJoin.add(j);
			}
		}
		return crossJoin;
	}

	/**
	 * Return a stream of Suggestion for genus and species
	 * 
	 * @param genus genus
	 * @param species species
	 * @return stream of best Suggestions
	 */
	private Stream<Suggestion<IGrinSpecies>> makeSuggestions(final List<String> genera, String species, final String spAuthor) {
		BestScore bestScore = new BestScore();

		for (var genus : genera) {
			if (StringUtils.startsWithIgnoreCase(species, genus + " ")) {
				log.debug("Removed {} from {} -> {}", genus, species, species.substring(genus.length()).trim());
				species = species.substring(genus.length()).trim();
			}
		}
		String speciesName = species;
		log.info("Making suggestions for {} {} {}", genera, species, spAuthor);

		Function<IGrinSpecies, Double> scorer = (IGrinSpecies speciesRow) -> {
			double score = 0.0;
			score += 3 * similarityScore(speciesName, speciesRow.getSpeciesName());
			int whatMatters = 3;
			if (spAuthor != null) {
				score += similarityScore(spAuthor, speciesRow.getSpeciesAuthority());
				whatMatters++;
			}
			return score / whatMatters;
		};

		return getAllGenusSpecies(genera).stream()
				// // print
				// .peek(InMemoryTaxonomyDatabase::print)
				// convert each candidate to a Suggestion
				.map(speciesRow -> new Suggestion<>(speciesRow, scorer.apply(speciesRow)))
				// .peek(InMemoryTaxonomyDatabase::print)
				// keep suggestions with score over .5
				.filter(scored -> scored.getScore() >= .5)
				// if record is not current, reduce score by 20%
				.peek(scored -> {
					// But only if not a 100% match
					if (scored.getScore() < 1.0) {
						scored.setScore((scored.getSuggestion().isCurrent() ? 1.0 : 0.8) * scored.getScore());
					}
				})
				// sort suggestions by score, descending
				.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
				// generate bestScore
				.peek(scored -> bestScore.update(scored.getScore()))
				// keep suggestions: when bestScore is 1.0 keep suggestions over 0.95 otherwise keep suggestions within
				// 80%
				.filter(scored -> scored.getScore() >= (bestScore.getBestScore() == 1.0 ? 0.95 : (bestScore.getBestScore() * 0.8)));
	}

	/**
	 * similarityScore returns a string similarity value in the range [0, 1.0] (where 1.0 is full match).
	 *
	 * @param original the original
	 * @param candidate the candidate
	 * @return the score between 0 and 1.0 where 0 is no similarity and 1.0 is full match
	 */
	@Override
	public double similarityScore(final String original, final String candidate) {
		if (StringUtils.isBlank(candidate) && StringUtils.isBlank(original)) return 1.0;
		if (StringUtils.isBlank(candidate) || StringUtils.isBlank(original)) return 0.0;
		if (StringUtils.equals(original, candidate)) return 1.0;
		double score = (StringSimilarity.diceCoefficientOptimized(original.toLowerCase(), candidate.toLowerCase())
				+ StringSimilarity.getLevenshteinCoefficient(original.toLowerCase(), candidate.toLowerCase())) / 2.0f;
		return score * 0.9;
		// StringUtils.getLevenshteinDistance(original.toLowerCase(), candidate.toLowerCase(), 5);
		// (float) StringUtils.getJaroWinklerDistance(original, candidate)
	}

	@Override
	public List<String> getSpeciesAuthority(String genus, String species) {
		return getSpeciesAuthority(genus, species, null);
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#getSpeciesAuthority(java.lang.String,
	 * java.lang.String)
	 */
	@Override
	public List<String> getSpeciesAuthority(String genus, String species, String spAuthor) {
		List<Long> genusId = genusIdSpecies.get(genus);
		if (genusId == null) {
			return null;
		}

		final boolean isSpecificHybrid = isNameSpecificHybrid(species);

		List<String> candidates = getAllGenusSpecies(genus).stream()
				// keep rows with matching species
				.filter(speciesRow -> {
					return (
						StringUtils.equals(species, speciesRow.getSpeciesName())
						&& (isSpecificHybrid ? "Y".equals(speciesRow.getIsSpecificHybrid()) : true)
					);
				})
				// debug print
				.peek(speciesRow -> log.trace("Species authority {}", speciesRow.getSpeciesAuthority()))
				// // keep first match only
				// .findFirst()
				// to String or null
				.map(speciesRow -> speciesRow.getSpeciesAuthority())
				.filter(Objects::nonNull)
				.distinct()
				.collect(Collectors.toList());

		if (spAuthor != null && candidates.stream().filter(authorName -> StringUtils.equals(authorName, spAuthor)).findAny().isPresent()) {
			return List.of(spAuthor);
		} else {
			return candidates;
		}
	}

	/**
	 * Check if the name starts with "x "
	 * 
	 * @param name the name to check
	 * @return true if name denotes a specific hybrid
	 */
	private boolean isNameSpecificHybrid(String name) {
		for (var prefix : HYBRID_SPECIES_PREFIXES) {
			if (StringUtils.startsWith(name, prefix)) return true;
		}
		return false;
	}

	/**
	 * Check if name contains " x "
	 * 
	 * @param name the name to check
	 * @return true when name denotes a hybrid
	 */
	private boolean isNameHybrid(String name) {
		for (String opt : HYBRID_MARKER_ALT) {
			if (StringUtils.contains(name, opt))
				return true;
		}
		return false;
	}

	@Override
	public boolean containsSubtaxa(String genus, String species, String subtaxa) {
		return containsSubtaxa(genus, species, null, subtaxa, null);
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#containsSubtaxa(java.lang.String, java.lang.String, java.lang.String)
	 */
	@Override
	public boolean containsSubtaxa(String genus, String species, String spAuthor, String subtaxa, String subtAuthor) {
		log.trace("Does database contain genus={} species={}", genus, species);

		if (!genusIdSpecies.containsKey(genus)) {
			return false;
		}

		return getAllGenusSpecies(genus).stream()
				// keep matching speciesRows within genues
				.filter(speciesRow -> StringUtils.equals(species, speciesRow.getSpeciesName()))
				// filter
				.anyMatch(speciesRow -> (
					StringUtils.equals(subtaxa, speciesRow.getSubtaxa())
					&& (StringUtils.isNotBlank(subtAuthor) ? StringUtils.equals(subtAuthor, speciesRow.getSubtaxaAuthority()) : true)
				));
	}

	@Override
	public List<String> findSimilarSubtaxa(String genus, String species, String subtaxa, int maxSize) {
		return findSimilarSubtaxa(genus, species, null, subtaxa, null, maxSize);
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#findSimilarSubtaxa(java.lang.String, java.lang.String, java.lang.String, int)
	 */
	@Override
	public List<String> findSimilarSubtaxa(String genus, String species, String spAuthor, String subtaxa, String subtAuthor, int maxSize) {
		log.debug("Searching similar subtaxa for genus={} species={} subtaxa={}", genus, species, subtaxa);

		if (StringUtils.isBlank(subtaxa)) {
			return List.of();
		}

		List<Long> genusId = genusIdSpecies.get(genus);
		if (genusId == null) {
			throw new UnsupportedOperationException("Genus does not exist in database. Genus=" + genus);
		}

		BestScore bestScore = new BestScore();

		return getAllGenusSpecies(genus).stream().filter(speciesRow -> species.equals(speciesRow.getSpeciesName()))
				// debug print
				// .peek(InMemoryTaxonomyDatabase::print)
				// keep candidates that are not blank
				.filter(speciesRow -> StringUtils.isNotBlank(speciesRow.getSubtaxa()))
				// convert to Suggestions
				.map(speciesRow -> new Suggestion<>(speciesRow, similarityScore(subtaxa, speciesRow.getSubtaxa())))
				// keep suggestions with decent score
				.filter(scored -> scored.getScore() >= .5)
				// .peek(InMemoryTaxonomyDatabase::print)
				// if record is not current, reduce score by 20%
				.peek(scored -> scored.setScore((scored.getSuggestion().isCurrent() ? 1.0 : 0.8) * scored.getScore()))
				// sort by score descending
				.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
				// update best score
				.peek(scored -> bestScore.update(scored.getScore()))
				// .peek(InMemoryTaxonomyDatabase::print)
				// keep only nice Suggestions
				.filter(scored -> scored.getScore() >= (bestScore.getBestScore() == 1.0 ? 0.95 : (bestScore.getBestScore() * 0.8)))
				// debug print
				// .peek(InMemoryTaxonomyDatabase::print)
				// back to Strings
				.map(Suggestion::getSuggestion)
				// keep subtaxa
				.map(IGrinSpecies::getSubtaxa)
				// keep distinct
				.distinct()
				// limit results to maxSize
				.limit(maxSize).collect(Collectors.toList());
	}

	@Override
	public List<String> getSubtaxaAuthority(String genus, String species, String subtaxa) {
		return getSubtaxaAuthority(genus, species, null, subtaxa, null);
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#getSubtaxaAuthority(java.lang.String, java.lang.String, java.lang.String)
	 */
	@Override
	public List<String> getSubtaxaAuthority(String genus, String species, String spAuthor, String subtaxa, String subtAuthor) {
		List<Long> genusId = genusIdSpecies.get(genus);
		if (genusId == null) {
			return null;
		}

		return getAllGenusSpecies(genus).stream()
				// keep rows with matching species
				.filter(speciesRow -> StringUtils.equals(species, speciesRow.getSpeciesName()))
				// keep rows with matching subtaxa
				.filter(speciesRow -> StringUtils.equals(subtaxa, speciesRow.getSubtaxa()))
				// debug print
				.peek(speciesRow -> log.trace("Subtaxa authority {}", speciesRow.getSubtaxaAuthority()))
				// // keep first match only
				// .findFirst()
				// to String or null
				.map(speciesRow -> speciesRow.getSubtaxaAuthority())
				// return
				.collect(Collectors.toList());
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#listSpecies(java.lang.String, java.lang.String, int)
	 */
	@Override
	public List<IGrinSpecies> listSpecies(String genus, String species, int maxSize) {
		return getAllGenusSpecies(genus).stream()
				// keep rows with matching species
				.filter(speciesRow -> StringUtils.equals(species, speciesRow.getSpeciesName()))
				// limit results to maxSize
				.limit(maxSize).collect(Collectors.toList());
	}

	@Override
	public List<IGrinSpecies> findSpeciesRow(String genus, String species, String subtaxa) {
		return findSpeciesRows(genus, species, null, subtaxa, null);
	}

	@Override
	public List<IGrinSpecies> findSpeciesRows(String genus, String species, String spAuthor, String subtaxa, String subtAuthor) {

		// Use GRIN naming
		if (StringUtils.equals("sp.", species)) {
			species = "spp.";
		}

		String speciesName = species;
		final boolean isSpecificHybrid = isNameSpecificHybrid(speciesName);

		return getAllGenusSpecies(genus).stream()
				// keep rows with matching species
				.filter(speciesRow -> {
					return (
						(spAuthor != null ? StringUtils.equals(StringUtils.defaultIfBlank(spAuthor, null), speciesRow.getSpeciesAuthority()) : true)
						&& (isSpecificHybrid ?
							("Y".equals(speciesRow.getIsSpecificHybrid()) && StringUtils.equals(speciesName.substring(HYBRID_SPECIES_PREFIX.length()).trim(), speciesRow.getSpeciesName()))
							: StringUtils.equals(speciesName, speciesRow.getSpeciesName()))
					);
				})
				// match subtaxa
				.filter(speciesRow -> (
					(subtaxa != null ? (StringUtils.equals(StringUtils.defaultIfBlank(subtaxa, null), speciesRow.getSubtaxa()) || speciesRow.subtaxaMatches(subtaxa)) : true)
					&& (subtAuthor != null ? StringUtils.equals(StringUtils.defaultIfBlank(subtAuthor, null), speciesRow.getSubtaxaAuthority()) : true)
				))
				// to list
				.collect(Collectors.toList());
	}

	@Override
	public Taxon getTaxon(String genus, String species, String spAuthor, String subtaxa, String subtAuthor) {
		var results = findSpeciesRows(genus, species, StringUtils.defaultIfBlank(spAuthor, null), StringUtils.defaultIfBlank(subtaxa, ""), StringUtils.defaultIfBlank(subtAuthor, null));
		if (results.size() == 1) {
			return new Taxon(results.get(0), speciesIdLookup::get, genusIdLookup::get);
		} else {
			log.info("Got {} results for {} {} {} {} {}", results.size(), genus, species, spAuthor, subtaxa, subtAuthor);
			results.forEach(r -> log.info("> {}", r));
			return null;
		}
	}

	@Override
	public List<Taxon> findTaxa(String genus, String species, String spAuthor, String subtaxa, String subtAuthor, int maxSize) {
		return findSpeciesRows(genus, species, StringUtils.defaultIfBlank(spAuthor, null), StringUtils.defaultIfBlank(subtaxa, ""), StringUtils.defaultIfBlank(subtAuthor, null))
			.stream()
			// limit
			.limit(maxSize)
			// convert
			.map(speciesRow -> new Taxon(speciesRow, speciesIdLookup::get, genusIdLookup::get))
			// collect
			.collect(Collectors.toList());
	}

	@Override
	public IGrinSpecies getSpeciesRow(long speciesId) {
		return speciesIdLookup.get(speciesId);
	}

	@Override
	public String getGenus(long genusId) {
		return genusIdLookup.get(genusId);
	}
}
