/*
 * Copyright 2016 Global Crop Diversity Trust
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.genesys.taxonomy.checker;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.commons.lang3.StringUtils;
import org.genesys.taxonomy.gringlobal.model.IGrinSpecies;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * {@link List} based in-memory "database". NOT THREAD-SAFE!
 */
public class InMemoryTaxonomyDatabase implements TaxonomyDatabase {

	/** Percentage margin from best score. Matches with scores below bestScore * margin are ignored. */
	public static final double NONMATCH_MARGIN = 0.8;

	/** The Constant LOG. */
	private final static Logger LOG = LoggerFactory.getLogger(InMemoryTaxonomyDatabase.class);

	private static final String HYBRID_SPECIES_PREFIX = "x ";

	/** Hybrid marker */
	private static final String HYBRID_MARKER = " x ";

	/** Alternative hybrid markers */
	private static final String[] HYBRID_MARKER_ALT = { " x ", " X " };

	/** Pattern to split hybrid species to left and right part */
	private static final Pattern HYBRID_MARKER_REGEXP = Pattern.compile("\\s+[xX]\\s+");

	/** The genus id lookup. */
	private Map<Long, String> genusIdLookup = new HashMap<>();

	/** The genus id lookup. */
	private Map<String, List<Long>> genusIdSpecies = new HashMap<>();

	/** The species lookup. */
	private Map<Long, List<IGrinSpecies>> speciesLookup = new HashMap<>();

	/** The species lookup. */
	private Map<Long, IGrinSpecies> speciesIdLookup = new HashMap<>();

	/** The species rows. */
	private int speciesRows;

	/**
	 * Add a genus to the database.
	 *
	 * @param genusId the genus id
	 * @param genus the genus
	 */
	public void registerGenus(Long genusId, String genus) {
		if (!genusIdSpecies.containsKey(genus)) {
			genusIdSpecies.put(genus, new ArrayList<>(1));
		}
		genusIdLookup.put(genusId, genus);
		genusIdSpecies.get(genus).add(genusId);
		speciesLookup.put(genusId, new ArrayList<>(1));
	}

	/**
	 * Add species to the database.
	 *
	 * @param speciesRow the species row
	 * @throws TaxonomyException the taxonomy exception
	 */
	public void registerSpecies(IGrinSpecies speciesRow) throws TaxonomyException {
		List<IGrinSpecies> genusSpecies = speciesLookup.get(speciesRow.getGenusId());
		if (genusSpecies == null)
			throw new TaxonomyException("No genus with specified genusId");

		genusSpecies.add(speciesRow);
		if (speciesIdLookup.containsKey(speciesRow.getSpeciesId())) {
			throw new TaxonomyException("Duplicate speciesId not allowed");
		}
		speciesIdLookup.put(speciesRow.getSpeciesId(), speciesRow);
		speciesRows++;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see java.lang.Object#toString()
	 */
	@Override
	public String toString() {
		return "InMemory Taxonomy Database: " + genusIdSpecies.size() + " genera" + " and " + speciesRows + " species";
	}

	/**
	 * Case insensitive search for genus.
	 *
	 * @param genus the genus
	 * @return true, if successful
	 */
	@Override
	public boolean containsGenus(String genus) {
		return genusIdSpecies.containsKey(genus);
		// .keySet().stream().anyMatch(g -> g.equalsIgnoreCase(genus));
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#findSimilarGenus(java.lang.String, int)
	 */
	@Override
	public List<String> findSimilarGenus(String genus, int maxSize) {
		if (containsGenus(genus)) {
			LOG.trace("Database contains genus={}", genus);
			return Collections.emptyList();
		}

		BestScore bestScore = new BestScore();

		return genusIdSpecies.keySet().parallelStream()
				// convert each genus to Suggestion
				.map(candidate -> new Suggestion<String>(candidate, similarityScore(genus, candidate)))
				// keep decently scored suggestions
				.filter(scored -> scored.getScore() >= .5)
				// debug print
				// .peek(InMemoryTaxonomyDatabase::print)
				// convert to sequential stream
				.sequential()
				// debug print
				// .peek(InMemoryTaxonomyDatabase::print)
				// update best score
				.peek(scored -> bestScore.update(scored.getScore()))
				// sort by score, descending
				.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
				// debug print
				// .peek(InMemoryTaxonomyDatabase::print)
				// Prefer full match
				.filter(scored -> scored.getScore() >= (bestScore.getBestScore() == 1.0 ? 0.95 : (bestScore.getBestScore() * NONMATCH_MARGIN)))
				// debug print
				.peek(InMemoryTaxonomyDatabase::print)
				// convert to suggested Strings
				.map(Suggestion::getSuggestion)
				// keep distinct results
				.distinct()
				// limit to maxSize
				.limit(maxSize).collect(Collectors.toList());
	}

	/**
	 * Prints the.
	 *
	 * @param <T> the generic type
	 * @param suggestion the suggestion
	 */
	public static <T> void print(Suggestion<T> suggestion) {
		if (LOG.isTraceEnabled())
			LOG.trace("Score={} suggestion={}", suggestion.getScore(), suggestion.getSuggestion());
	}

	/**
	 * Prints the.
	 *
	 * @param suggestion the suggestion
	 */
	public static void print(Object suggestion) {
		if (LOG.isTraceEnabled())
			LOG.trace(suggestion == null ? "NULL" : suggestion.getClass() + "=" + suggestion.toString());
	}

	/**
	 * Gets the all genus species.
	 *
	 * @param genus the genus
	 * @return the all genus species
	 */
	protected List<IGrinSpecies> getAllGenusSpecies(String genus) {
		if (!genusIdSpecies.containsKey(genus)) {
			return Collections.emptyList();
		}

		return genusIdSpecies.get(genus).stream()
				// .peek(InMemoryTaxonomyDatabase::print)
				.map(genusId -> speciesLookup.get(genusId)).reduce(new ArrayList<IGrinSpecies>(1), (all, genusSpecies) -> {
					all.addAll(genusSpecies);
					return all;
				});
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#containsSpecies(java.lang.String, java.lang.String)
	 */
	@Override
	public boolean containsSpecies(String genus, String species) {
		LOG.trace("Does database contain genus={} species={}", genus, species);

		if (!genusIdSpecies.containsKey(genus)) {
			return false;
		}

		final boolean isSpecificHybrid = isNameSpecificHybrid(species);
		final boolean isHybrid = isNameHybrid(species);

		if (isHybrid) {
			String[] split = HYBRID_MARKER_REGEXP.split(species);
			String speciesLeft = split[0];
			String speciesRight = split[1];
			if (LOG.isTraceEnabled()) {
				LOG.trace("Species {} is a hybrid of {} and {}", species, speciesLeft, speciesRight);
			}
			// Check left and right
			return containsSpecies(genus, speciesLeft) && containsSpecies(genus, speciesRight);

		} else {
			return getAllGenusSpecies(genus).stream().anyMatch(speciesRow -> {
				if (isSpecificHybrid) {
					return "Y".equals(speciesRow.getIsSpecificHybrid()) && StringUtils.equals(species.substring(HYBRID_SPECIES_PREFIX.length()), speciesRow.getSpeciesName());
				} else {
					return StringUtils.equals(species, speciesRow.getSpeciesName());
				}
			});
		}
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#findSimilarSpecies(java.lang.String, java.lang.String, int)
	 */
	@Override
	public List<String> findSimilarSpecies(String genus, String species, int maxSize) {
		LOG.debug("Searching similar species for genus={} species={}", genus, species);
		if ("Unknown".equals(genus)) {
			// Don't make suggestions
			return Collections.emptyList();
		}
		List<Long> genusId = genusIdSpecies.get(genus);
		if (genusId == null) {
			throw new UnsupportedOperationException("Genus does not exist in database. Genus=" + genus);
		}

		final boolean isHybrid = isNameHybrid(species);

		if (isHybrid) {
			String[] split = HYBRID_MARKER_REGEXP.split(species);
			String speciesLeft = split[0];
			String speciesRight = split[1];
			if (LOG.isTraceEnabled()) {
				LOG.trace("Species {} is a hybrid of {} and {}", species, speciesLeft, speciesRight);
			}

			List<Suggestion<String>> lefts = makeSuggestions(genus, speciesLeft).limit(maxSize)
					.map(suggestion -> new Suggestion<>(suggestion.getSuggestion().getSpeciesName(), suggestion.getScore())).collect(Collectors.toList());

			List<Suggestion<String>> rights = makeSuggestions(genus, speciesRight).limit(maxSize)
					.map(suggestion -> new Suggestion<>(suggestion.getSuggestion().getSpeciesName(), suggestion.getScore())).collect(Collectors.toList());

			// Inject the provided species name when the best counterpart is not a 100% match
			if (lefts.size() == 0 && rights.size() > 0 && rights.get(0).getScore() < 1) {
				lefts.add(new Suggestion<String>(speciesLeft, 0.1));
			} else if (rights.size() == 0 && lefts.size() > 0 && lefts.get(0).getScore() < 1) {
				rights.add(new Suggestion<String>(speciesRight, 0.1));
			}

			if (LOG.isTraceEnabled()) {
				LOG.trace("Left for {} is {}", speciesLeft, lefts);
				LOG.trace("Right for {} is {}", speciesRight, rights);
			}

			return crossJoinSpecies(lefts, rights)
					// stream results
					.stream()
					// order by score
					.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
					// convert sorted Suggestion list back to Strings
					.map(Suggestion::getSuggestion)
					// keep distinct elements
					.distinct()
					// limit to maxSize
					.limit(maxSize)
					// convert to List<String>
					.collect(Collectors.toList());
		}

		return makeSuggestions(genus, species)
				// print
				// .peek(InMemoryTaxonomyDatabase::print)
				// convert sorted Suggestion list back to Strings
				.map(Suggestion::getSuggestion)
				// keep species name
				.map(IGrinSpecies::getSpeciesName)
				// keep distinct elements
				.distinct()
				// limit to maxSize
				.limit(maxSize)
				// convert to List<String>
				.collect(Collectors.toList());
	}

	/**
	 * Create a list of Suggestions pairing each one in left with each one in right. The score of the joined suggestion is a product of both scores.
	 * 
	 * @param lefts left suggestions
	 * @param rights right suggestions
	 * @return
	 * @return List of cross-joined suggestions from both lists
	 */
	private List<Suggestion<String>> crossJoinSpecies(List<Suggestion<String>> lefts, List<Suggestion<String>> rights) {
		List<Suggestion<String>> crossJoin = new ArrayList<>();
		for (Suggestion<String> l : lefts) {
			for (Suggestion<String> r : rights) {
				Suggestion<String> j = new Suggestion<>(l.getSuggestion().concat(HYBRID_MARKER).concat(r.getSuggestion()), l.getScore() * r.getScore());
				if (LOG.isTraceEnabled()) {
					LOG.trace("Cross-join '{}' with '{}' result={}", l, r, j);
				}
				crossJoin.add(j);
			}
		}
		return crossJoin;
	}

	/**
	 * Return a stream of Suggestion for genus and species
	 * 
	 * @param genus genus
	 * @param species species
	 * @return stream of best Suggestions
	 */
	private Stream<Suggestion<IGrinSpecies>> makeSuggestions(final String genus, final String species) {
		BestScore bestScore = new BestScore();

		return getAllGenusSpecies(genus).stream()
				// print
				// .peek(InMemoryTaxonomyDatabase::print)
				// convert each candidate to a Suggestion
				.map(speciesRow -> new Suggestion<>(speciesRow, similarityScore(species, speciesRow.getSpeciesName())))
				// .peek(InMemoryTaxonomyDatabase::print)
				// keep suggestions with score over .5
				.filter(scored -> scored.getScore() >= .5)
				// if record is not current, reduce score by 20%
				.peek(scored -> scored.setScore((scored.getSuggestion().isCurrent() ? 1.0 : 0.8) * scored.getScore()))
				// sort suggestions by score, descending
				.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
				// generate bestScore
				.peek(scored -> bestScore.update(scored.getScore()))
				// keep suggestions: when bestScore is 1.0 keep suggestions over 0.95 otherwise keep suggestions within
				// 80%
				.filter(scored -> scored.getScore() >= (bestScore.getBestScore() == 1.0 ? 0.95 : (bestScore.getBestScore() * 0.8)));
	}

	/**
	 * similarityScore returns a string similarity value in the range [0, 1.0] (where 1.0 is full match).
	 *
	 * @param original the original
	 * @param candidate the candidate
	 * @return the score between 0 and 1.0 where 0 is no similarity and 1.0 is full match
	 */
	@Override
	public double similarityScore(final String original, final String candidate) {
		return (StringSimilarity.diceCoefficientOptimized(original.toLowerCase(), candidate.toLowerCase())
				+ StringSimilarity.getLevenshteinCoefficient(original.toLowerCase(), candidate.toLowerCase())) / 2.0f;
		// StringUtils.getLevenshteinDistance(original.toLowerCase(), candidate.toLowerCase(), 5);
		// (float) StringUtils.getJaroWinklerDistance(original, candidate)
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#getSpeciesAuthority(java.lang.String,
	 * java.lang.String)
	 */
	@Override
	public String getSpeciesAuthority(String genus, String species) {
		List<Long> genusId = genusIdSpecies.get(genus);
		if (genusId == null) {
			return null;
		}

		final boolean isSpecificHybrid = isNameSpecificHybrid(species);

		return getAllGenusSpecies(genus).stream()
				// keep rows with matching species
				.filter(speciesRow -> {
					if (isSpecificHybrid) {
						return "Y".equals(speciesRow.getIsSpecificHybrid()) && StringUtils.equals(species.substring(HYBRID_SPECIES_PREFIX.length()), speciesRow.getSpeciesName());
					} else {
						return StringUtils.equals(species, speciesRow.getSpeciesName());
					}
				})
				// debug print
				.peek(speciesRow -> LOG.trace("Species authority {}", speciesRow.getSpeciesAuthority()))
				// keep first match only
				.findFirst()
				// to String or null
				.map(speciesRow -> speciesRow.getSpeciesAuthority()).orElse(null);
	}

	/**
	 * Check if the name starts with "x "
	 * 
	 * @param name the name to check
	 * @return true if name denotes a specific hybrid
	 */
	private boolean isNameSpecificHybrid(String name) {
		return StringUtils.startsWith(name, HYBRID_SPECIES_PREFIX);
	}

	/**
	 * Check if name contains " x "
	 * 
	 * @param name the name to check
	 * @return true when name denotes a hybrid
	 */
	private boolean isNameHybrid(String name) {
		for (String opt : HYBRID_MARKER_ALT) {
			if (StringUtils.contains(name, opt))
				return true;
		}
		return false;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#containsSubtaxa(java.lang.String, java.lang.String, java.lang.String)
	 */
	@Override
	public boolean containsSubtaxa(String genus, String species, String subtaxa) {
		LOG.trace("Does database contain genus={} species={}", genus, species);

		if (!genusIdSpecies.containsKey(genus)) {
			return false;
		}

		return getAllGenusSpecies(genus).stream()
				// keep matching speciesRows within genues
				.filter(speciesRow -> StringUtils.equals(species, speciesRow.getSpeciesName()))
				// return true if one row has exact match on SUBTAXA
				.anyMatch(speciesRow -> StringUtils.equals(subtaxa, speciesRow.getSubtaxa()));
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#findSimilarSubtaxa(java.lang.String, java.lang.String, java.lang.String, int)
	 */
	@Override
	public List<String> findSimilarSubtaxa(String genus, String species, String subtaxa, int maxSize) {
		LOG.debug("Searching similar subtaxa for genus={} species={} subtaxa={}", genus, species, subtaxa);

		if (StringUtils.isBlank(subtaxa)) {
			return List.of();
		}

		List<Long> genusId = genusIdSpecies.get(genus);
		if (genusId == null) {
			throw new UnsupportedOperationException("Genus does not exist in database. Genus=" + genus);
		}

		BestScore bestScore = new BestScore();

		return getAllGenusSpecies(genus).stream().filter(speciesRow -> species.equals(speciesRow.getSpeciesName()))
				// debug print
				// .peek(InMemoryTaxonomyDatabase::print)
				// keep candidates that are not blank
				.filter(speciesRow -> StringUtils.isNotBlank(speciesRow.getSubtaxa()))
				// convert to Suggestions
				.map(speciesRow -> new Suggestion<>(speciesRow, similarityScore(subtaxa, speciesRow.getSubtaxa())))
				// keep suggestions with decent score
				.filter(scored -> scored.getScore() >= .5)
				// .peek(InMemoryTaxonomyDatabase::print)
				// if record is not current, reduce score by 20%
				.peek(scored -> scored.setScore((scored.getSuggestion().isCurrent() ? 1.0 : 0.8) * scored.getScore()))
				// sort by score descending
				.sorted(Comparator.comparing(Suggestion::getScore, Comparator.reverseOrder()))
				// update best score
				.peek(scored -> bestScore.update(scored.getScore()))
				// .peek(InMemoryTaxonomyDatabase::print)
				// keep only nice Suggestions
				.filter(scored -> scored.getScore() >= (bestScore.getBestScore() == 1.0 ? 0.95 : (bestScore.getBestScore() * 0.8)))
				// debug print
				// .peek(InMemoryTaxonomyDatabase::print)
				// back to Strings
				.map(Suggestion::getSuggestion)
				// keep subtaxa
				.map(IGrinSpecies::getSubtaxa)
				// keep distinct
				.distinct()
				// limit results to maxSize
				.limit(maxSize).collect(Collectors.toList());
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#getSubtaxaAuthority(java.lang.String, java.lang.String, java.lang.String)
	 */
	@Override
	public String getSubtaxaAuthority(String genus, String species, String subtaxa) {
		List<Long> genusId = genusIdSpecies.get(genus);
		if (genusId == null) {
			return null;
		}

		return getAllGenusSpecies(genus).stream()
				// keep rows with matching species
				.filter(speciesRow -> StringUtils.equals(species, speciesRow.getSpeciesName()))
				// keep rows with matching subtaxa
				.filter(speciesRow -> StringUtils.equals(subtaxa, speciesRow.getSubtaxa()))
				// debug print
				.peek(speciesRow -> LOG.trace("Subtaxa authority {}", speciesRow.getSubtaxaAuthority()))
				// keep first match only
				.findFirst()
				// to String or null
				.map(speciesRow -> speciesRow.getSubtaxaAuthority()).orElse(null);
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.genesys.taxonomy.checker.TaxonomyDatabase#listSpecies(java.lang.String, java.lang.String, int)
	 */
	@Override
	public List<IGrinSpecies> listSpecies(String genus, String species, int maxSize) {
		return getAllGenusSpecies(genus).stream()
				// keep rows with matching species
				.filter(speciesRow -> StringUtils.equals(species, speciesRow.getSpeciesName()))
				// limit results to maxSize
				.limit(maxSize).collect(Collectors.toList());
	}

	@Override
	public List<IGrinSpecies> findSpeciesRow(String genus, String species, String subtaxa) {

		final boolean isSpecificHybrid = isNameSpecificHybrid(species);

		return getAllGenusSpecies(genus).stream()
				// keep rows with matching species
				.filter(speciesRow -> {
					if (isSpecificHybrid) {
						return "Y".equals(speciesRow.getIsSpecificHybrid()) && StringUtils.equals(species.substring(HYBRID_SPECIES_PREFIX.length()).trim(), speciesRow.getSpeciesName());
					} else {
						return StringUtils.equals(species, speciesRow.getSpeciesName());
					}
				})
				// match subtaxa
				.filter(speciesRow -> StringUtils.equals(StringUtils.defaultIfBlank(subtaxa, null), StringUtils.defaultIfBlank(speciesRow.getSubtaxa(), null)) || speciesRow.subtaxaMatches(subtaxa))
				// to list
				.collect(Collectors.toList());
	}

	@Override
	public IGrinSpecies getSpeciesRow(long speciesId) {
		return speciesIdLookup.get(speciesId);
	}

	@Override
	public String getGenus(long genusId) {
		return genusIdLookup.get(genusId);
	}
}
