/*
 * Decompiled with CFR 0.152.
 */
package org.molgenis.data.semanticsearch.service.impl;

import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.molgenis.MolgenisFieldTypes;
import org.molgenis.data.DataService;
import org.molgenis.data.Entity;
import org.molgenis.data.EntityMetaData;
import org.molgenis.data.MolgenisDataAccessException;
import org.molgenis.data.QueryRule;
import org.molgenis.data.semanticsearch.string.NGramDistanceAlgorithm;
import org.molgenis.data.semanticsearch.string.Stemmer;
import org.molgenis.data.support.QueryImpl;
import org.molgenis.ontology.core.model.OntologyTerm;
import org.molgenis.ontology.core.service.OntologyService;
import org.molgenis.ontology.ic.TermFrequencyService;
import org.springframework.beans.factory.annotation.Autowired;

public class SemanticSearchServiceHelper {
    private final TermFrequencyService termFrequencyService;
    private final DataService dataService;
    private final OntologyService ontologyService;
    private final Stemmer stemmer = new Stemmer();
    public static final int MAX_NUM_TAGS = 3;
    private static final char SPACE_CHAR = ' ';
    private static final String COMMA_CHAR = ",";
    private static final String CARET_CHARACTER = "^";
    private static final String ESCAPED_CARET_CHARACTER = "\\^";
    private static final String ILLEGAL_CHARS_REGEX = "[^\\p{L}'a-zA-Z0-9\\.~]+";

    @Autowired
    public SemanticSearchServiceHelper(DataService dataService, OntologyService ontologyService, TermFrequencyService termFrequencyService) {
        this.dataService = Objects.requireNonNull(dataService);
        this.ontologyService = Objects.requireNonNull(ontologyService);
        this.termFrequencyService = Objects.requireNonNull(termFrequencyService);
    }

    public QueryRule createDisMaxQueryRuleForAttribute(Set<String> searchTerms, Collection<OntologyTerm> ontologyTerms) {
        ArrayList<String> queryTerms = new ArrayList<String>();
        if (searchTerms != null) {
            queryTerms.addAll(searchTerms.stream().filter(StringUtils::isNotBlank).map(this::processQueryString).collect(Collectors.toList()));
        }
        ontologyTerms.stream().filter(ontologyTerm -> !ontologyTerm.getIRI().contains(COMMA_CHAR)).forEach(ot -> queryTerms.addAll(this.parseOntologyTermQueries((OntologyTerm)ot)));
        QueryRule disMaxQueryRule = this.createDisMaxQueryRuleForTerms(queryTerms);
        ontologyTerms.stream().filter(ontologyTerm -> ontologyTerm.getIRI().contains(COMMA_CHAR)).forEach(ot -> disMaxQueryRule.getNestedRules().add(this.createShouldQueryRule(ot.getIRI())));
        return disMaxQueryRule;
    }

    public QueryRule createDisMaxQueryRuleForTerms(List<String> queryTerms) {
        ArrayList rules = new ArrayList();
        queryTerms.stream().filter(StringUtils::isNotEmpty).map(this::escapeCharsExcludingCaretChar).forEach(query -> {
            rules.add(new QueryRule("label", QueryRule.Operator.FUZZY_MATCH, query));
            rules.add(new QueryRule("description", QueryRule.Operator.FUZZY_MATCH, query));
        });
        QueryRule finalDisMaxQuery = new QueryRule(rules);
        finalDisMaxQuery.setOperator(QueryRule.Operator.DIS_MAX);
        return finalDisMaxQuery;
    }

    public QueryRule createBoostedDisMaxQueryRuleForTerms(List<String> queryTerms, Double boostValue) {
        QueryRule finalDisMaxQuery = this.createDisMaxQueryRuleForTerms(queryTerms);
        if (boostValue != null && boostValue.intValue() != 0) {
            finalDisMaxQuery.setValue((Object)boostValue);
        }
        return finalDisMaxQuery;
    }

    public QueryRule createShouldQueryRule(String multiOntologyTermIri) {
        QueryRule shouldQueryRule = new QueryRule(new ArrayList());
        shouldQueryRule.setOperator(QueryRule.Operator.SHOULD);
        for (String ontologyTermIri : multiOntologyTermIri.split(COMMA_CHAR)) {
            OntologyTerm ontologyTerm = this.ontologyService.getOntologyTerm(ontologyTermIri);
            List<String> queryTerms = this.parseOntologyTermQueries(ontologyTerm);
            Double termFrequency = this.getBestInverseDocumentFrequency(queryTerms);
            shouldQueryRule.getNestedRules().add(this.createBoostedDisMaxQueryRuleForTerms(queryTerms, termFrequency));
        }
        return shouldQueryRule;
    }

    public List<String> parseOntologyTermQueries(OntologyTerm ontologyTerm) {
        List<String> queryTerms = this.getOtLabelAndSynonyms(ontologyTerm).stream().map(this::processQueryString).collect(Collectors.toList());
        for (OntologyTerm childOt : this.ontologyService.getChildren(ontologyTerm)) {
            double boostedNumber = Math.pow(0.5, this.ontologyService.getOntologyTermDistance(ontologyTerm, childOt).intValue());
            this.getOtLabelAndSynonyms(childOt).forEach(synonym -> queryTerms.add(this.parseBoostQueryString((String)synonym, boostedNumber)));
        }
        return queryTerms;
    }

    public Set<String> getOtLabelAndSynonyms(OntologyTerm ontologyTerm) {
        LinkedHashSet allTerms = Sets.newLinkedHashSet((Iterable)ontologyTerm.getSynonyms());
        allTerms.add(ontologyTerm.getLabel());
        return allTerms;
    }

    public Map<String, String> collectExpandedQueryMap(Set<String> queryTerms, Collection<OntologyTerm> ontologyTerms) {
        LinkedHashMap<String, String> expandedQueryMap = new LinkedHashMap<String, String>();
        queryTerms.stream().filter(StringUtils::isNotBlank).forEach(queryTerm -> expandedQueryMap.put(Stemmer.cleanStemPhrase(queryTerm), (String)queryTerm));
        for (OntologyTerm ontologyTerm : ontologyTerms) {
            if (!ontologyTerm.getIRI().contains(COMMA_CHAR)) {
                this.collectOntologyTermQueryMap(expandedQueryMap, ontologyTerm);
                continue;
            }
            for (String ontologyTermIri : ontologyTerm.getIRI().split(COMMA_CHAR)) {
                this.collectOntologyTermQueryMap(expandedQueryMap, this.ontologyService.getOntologyTerm(ontologyTermIri));
            }
        }
        return expandedQueryMap;
    }

    public void collectOntologyTermQueryMap(Map<String, String> expanedQueryMap, OntologyTerm ontologyTerm) {
        if (ontologyTerm != null) {
            this.getOtLabelAndSynonyms(ontologyTerm).forEach(term -> expanedQueryMap.put(Stemmer.cleanStemPhrase(term), ontologyTerm.getLabel()));
            for (OntologyTerm childOntologyTerm : this.ontologyService.getChildren(ontologyTerm)) {
                this.getOtLabelAndSynonyms(childOntologyTerm).forEach(term -> expanedQueryMap.put(Stemmer.cleanStemPhrase(term), ontologyTerm.getLabel()));
            }
        }
    }

    public List<String> getAttributeIdentifiers(EntityMetaData sourceEntityMetaData) {
        Entity entityMetaDataEntity = this.dataService.findOne("entities", new QueryImpl().eq("fullName", (Object)sourceEntityMetaData.getName()));
        if (entityMetaDataEntity == null) {
            throw new MolgenisDataAccessException("Could not find EntityMetaDataEntity by the name of " + sourceEntityMetaData.getName());
        }
        ArrayList<String> attributeIdentifiers = new ArrayList<String>();
        this.recursivelyCollectAttributeIdentifiers(entityMetaDataEntity.getEntities("attributes"), attributeIdentifiers);
        return attributeIdentifiers;
    }

    private void recursivelyCollectAttributeIdentifiers(Iterable<Entity> attributeEntities, List<String> attributeIdentifiers) {
        for (Entity attributeEntity : attributeEntities) {
            Iterable entities;
            if (!attributeEntity.getString("dataType").equals(MolgenisFieldTypes.COMPOUND.toString())) {
                attributeIdentifiers.add(attributeEntity.getString("identifier"));
            }
            if ((entities = attributeEntity.getEntities("parts")) == null) continue;
            this.recursivelyCollectAttributeIdentifiers(entities, attributeIdentifiers);
        }
    }

    public List<OntologyTerm> findTags(String description, List<String> ontologyIds) {
        Set<String> searchTerms = this.removeStopWords(description);
        List matchingOntologyTerms = this.ontologyService.findOntologyTerms(ontologyIds, searchTerms, 3);
        return matchingOntologyTerms;
    }

    public String processQueryString(String queryString) {
        return StringUtils.join(this.removeStopWords(queryString), (char)' ');
    }

    public String parseBoostQueryString(String queryString, double boost) {
        return StringUtils.join((Iterable)this.removeStopWords(queryString).stream().map(word -> word + CARET_CHARACTER + boost).collect(Collectors.toSet()), (char)' ');
    }

    public String escapeCharsExcludingCaretChar(String string) {
        return QueryParser.escape((String)string).replace(ESCAPED_CARET_CHARACTER, CARET_CHARACTER);
    }

    public Set<String> removeStopWords(String description) {
        Set<String> searchTerms = Arrays.stream(description.split(ILLEGAL_CHARS_REGEX)).map(String::toLowerCase).filter(w -> !NGramDistanceAlgorithm.STOPWORDSLIST.contains(w) && StringUtils.isNotEmpty((CharSequence)w)).collect(Collectors.toSet());
        return searchTerms;
    }

    private Double getBestInverseDocumentFrequency(List<String> terms) {
        Optional<String> findFirst = terms.stream().sorted(new Comparator<String>(){

            @Override
            public int compare(String o1, String o2) {
                return Integer.compare(o1.length(), o2.length());
            }
        }).findFirst();
        return findFirst.isPresent() ? this.termFrequencyService.getTermFrequency(findFirst.get()) : null;
    }
}

