/*
 * Decompiled with CFR 0.152.
 */
package com.efficient.common.util;

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import com.efficient.common.entity.HotWord;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary;
import com.hankcs.hanlp.mining.word2vec.DocVectorModel;
import com.hankcs.hanlp.mining.word2vec.WordVectorModel;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.stream.Collectors;

public class HanLPUtil {
    private static final String FILTER_TERMS = "`~!@#$^&*()=|{}':;',\\[\\].<>/?~\uff01@#\uffe5\u2026\u2026&*\uff08\uff09\u2014\u2014|{}\u3010\u3011\u2018\uff1b\uff1a\u201d\u201c'\u3002\uff0c\u3001\uff1f";
    public static List<String> skipDictList = new ArrayList<String>();
    public static List<String> skipNatureList = new ArrayList<String>();
    public static DocVectorModel docVectorModel;

    public static List<HotWord> hotAnalyse(String content) {
        if (StrUtil.isBlank((CharSequence)content)) {
            return null;
        }
        List termList = CoreStopWordDictionary.apply((List)StandardTokenizer.segment((String)content));
        HashMap<String, Integer> pplHashMap = new HashMap<String, Integer>();
        HashMap<String, String> pplFlagMap = new HashMap<String, String>();
        ArrayList<HotWord> result = new ArrayList<HotWord>();
        block0: for (Term term : termList) {
            for (String skipNature : skipNatureList) {
                if (!term.nature.startsWith(skipNature)) continue;
                continue block0;
            }
            for (String skipDict : skipDictList) {
                if (!skipDict.equals(term.word)) continue;
                continue block0;
            }
            pplFlagMap.put(term.word, term.nature.toString());
            if (!pplHashMap.containsKey(term.word)) {
                pplHashMap.put(term.word, 1);
                continue;
            }
            pplHashMap.compute(term.word, (k, tmpCount) -> tmpCount + 1);
        }
        pplHashMap.forEach((k, v) -> {
            HotWord hotWord = new HotWord();
            hotWord.setWord((String)k);
            hotWord.setNature((String)pplFlagMap.get(k));
            hotWord.setCount((Integer)v);
            result.add(hotWord);
        });
        return result;
    }

    public static double vecSimilarity(String sentence1, String sentence2) {
        return docVectorModel.similarity(sentence1, sentence2);
    }

    public static void init(List<String> includeDictList, List<String> skipDictList, List<String> skipNatureList, String word2vecPath) throws IOException {
        if (CollUtil.isNotEmpty(includeDictList)) {
            includeDictList.forEach(CustomDictionary::add);
        }
        if (CollUtil.isNotEmpty(skipDictList)) {
            HanLPUtil.skipDictList = skipDictList;
        }
        if (CollUtil.isNotEmpty(skipNatureList)) {
            HanLPUtil.skipNatureList = skipNatureList;
        }
        if (CollUtil.isEmpty(skipNatureList)) {
            HanLPUtil.skipNatureList.add("m");
            HanLPUtil.skipNatureList.add("q");
            HanLPUtil.skipNatureList.add("t");
            HanLPUtil.skipNatureList.add("w");
        }
        if (StrUtil.isNotBlank((CharSequence)word2vecPath)) {
            docVectorModel = new DocVectorModel(new WordVectorModel(word2vecPath));
        }
    }

    public static double getSimilarity(String sentence1, String sentence2) {
        List<String> sent1Words = HanLPUtil.getSplitWords(sentence1);
        List<String> sent2Words = HanLPUtil.getSplitWords(sentence2);
        List<String> allWords = HanLPUtil.mergeList(sent1Words, sent2Words);
        int[] statistic1 = HanLPUtil.statistic(allWords, sent1Words);
        int[] statistic2 = HanLPUtil.statistic(allWords, sent2Words);
        double dividend = 0.0;
        double divisor1 = 0.0;
        double divisor2 = 0.0;
        for (int i = 0; i < statistic1.length; ++i) {
            dividend += (double)(statistic1[i] * statistic2[i]);
            divisor1 += Math.pow(statistic1[i], 2.0);
            divisor2 += Math.pow(statistic2[i], 2.0);
        }
        return dividend / (Math.sqrt(divisor1) * Math.sqrt(divisor2));
    }

    private static List<String> getSplitWords(String sentence) {
        return HanLP.segment((String)sentence).stream().filter(s -> !StrUtil.startWithAny((CharSequence)String.valueOf(s.nature.firstChar()), (CharSequence[])skipNatureList.toArray(new String[0]))).map(s -> s.word).collect(Collectors.toList());
    }

    private static List<String> mergeList(List<String> list1, List<String> list2) {
        ArrayList<String> result = new ArrayList<String>();
        result.addAll(list1);
        result.addAll(list2);
        return result.stream().distinct().collect(Collectors.toList());
    }

    private static int[] statistic(List<String> allWords, List<String> sentWords) {
        int[] result = new int[allWords.size()];
        for (int i = 0; i < allWords.size(); ++i) {
            result[i] = Collections.frequency(sentWords, allWords.get(i));
        }
        return result;
    }
}

