package com.walker.semantics.support;

import com.walker.infrastructure.utils.StringUtils;
import com.walker.semantics.ExtractorException;
import com.walker.semantics.InputWord;
import com.walker.semantics.SemanticsManager;
import com.walker.semantics.SummaryExtractor;
import com.walker.semantics.SummaryMeta;
import com.walker.semantics.SummaryQuery;
import com.walker.semantics.WordKey;
import org.ansj.app.keyword.KeyWordComputer;
import org.ansj.app.keyword.Keyword;
import org.ansj.app.summary.SummaryComputer;
import org.ansj.app.summary.TagContent;
import org.ansj.app.summary.pojo.Summary;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

public abstract class AbstractSummaryExtractor implements SummaryExtractor {

    protected final transient Logger logger = LoggerFactory.getLogger(getClass());

    @Override
    public SummaryMeta extract(SummaryQuery query) throws ExtractorException {
        if(query == null){
            throw new ExtractorException("未提供抽取条件，无法完成摘要抽取");
        }
        if(StringUtils.isEmpty(query.getContent())){
            throw new ExtractorException("原始抽取素材不存在");
        }
        if(query.getMinScore() <= 0){
            throw new ExtractorException("关键词分值必须大于0");
        }

        String input = query.getContent();
        if(this.logger.isDebugEnabled()){
            this.logger.debug(input);
        }

        InputWord inputWord = new InputWord(input);
        if(inputWord.getWordMetaList().size() <= 3){
            throw new ExtractorException("输入内容过少", query.getContent(), true);
        }
        logger.debug(inputWord.getWordMetaList().toString());

        String referenceKeywords = null;
        if(StringUtils.isNotEmpty(query.getKeywords())){
            referenceKeywords = query.getKeywords();
        } else {
            referenceKeywords = this.acquireKeywords(null, input);
        }
        logger.debug("referenceKeywords = {}", referenceKeywords);

        SummaryComputer summaryComputer = new SummaryComputer(query.getMaxLength(), null, input);
        Summary summary = summaryComputer.toSummary(referenceKeywords);
        List<Keyword> keywordList = summary.getKeyWords();
        if(keywordList == null || keywordList.size() <= 1){
            throw new ExtractorException("输入内容过少", query.getContent(), true);
        }

        double minScore = query.getMinScore();
        StringBuilder title = new StringBuilder();

        List<WordKey> wordKeyList = new ArrayList<>(keywordList.size());
        for(Keyword keyword : keywordList){
            if(keyword.getScore() >= minScore){
                wordKeyList.add(new WordKey(keyword.getScore(), keyword.getName()));
                title.append(keyword.getName());
            }
        }

        SummaryMeta summaryMeta = new SummaryMeta();
        summaryMeta.setTitle(title.toString());
        summaryMeta.setSummary(summary.getSummary());
        summaryMeta.setWordKeyList(wordKeyList);

        // 如果存在标签，添加标签内容
        if(StringUtils.isNotEmpty(query.getBeginTag()) && StringUtils.isNotEmpty(query.getEndTag())){
            TagContent tagContent = new TagContent(query.getBeginTag(), query.getEndTag());
            summaryMeta.setTagSummary(tagContent.tagContent(summary));
        }
        return summaryMeta;
    }

    /**
     * 从内容中获取排名靠前的关键词集合，按照分值排名
     * @param title 给定的参考标题，可选
     * @param content 给定的原始内容素材
     * @return 返回关键词字符串结果
     */
    private String acquireKeywords(String title, String content){
        KeyWordComputer kwc = new KeyWordComputer(20);
        Collection<Keyword> result = kwc.computeArticleTfidf(title, content);
        if(result == null || result.size() == 0){
            return StringUtils.EMPTY_STRING;
        }

        StringBuilder sb = new StringBuilder();
        int size = result.size();
        float currentSize = 0;
        for(Keyword kw : result){
            if(currentSize == 0){
                sb.append(kw.getName());
                currentSize ++;
                continue;
            }
            if(currentSize/size < keywordPercent){
                sb.append(kw.getName());
                currentSize ++;
                continue;
            } else {
                break;
            }
        }
        return sb.toString();
    }

    @Override
    public void setSemanticsManager(SemanticsManager semanticsManager) {
        this.semanticsManager = semanticsManager;
    }

    public SemanticsManager getSemanticsManager() {
        return semanticsManager;
    }

    public float getKeywordPercent() {
        return keywordPercent;
    }

    /**
     * 设置提取关键词中的百分比，不能使用全部关键词，只取前百分比部分的。
     * @param keywordPercent
     */
    public void setKeywordPercent(float keywordPercent) {
        this.keywordPercent = keywordPercent;
    }

    // 取前 60% 的关键词
    private float keywordPercent = 0.4f;
    private SemanticsManager semanticsManager;
}
