/*
 * Decompiled with CFR 0.152.
 */
package org.maochen.nlp.sentencetypeclassifier;

import com.google.common.collect.Sets;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.stream.Stream;
import org.maochen.nlp.datastructure.DNode;
import org.maochen.nlp.datastructure.DTree;
import org.maochen.nlp.parser.IParser;
import org.maochen.nlp.parser.stanford.pcfg.StanfordPCFGParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FeatureExtractor {
    private static final Logger LOG = LoggerFactory.getLogger(FeatureExtractor.class);
    final String filepathPrefix;
    IParser parser;
    String delimiter;
    boolean isRealFeature = false;
    Map<String, Integer> biGramWordMap = new HashMap<String, Integer>();
    Map<String, Integer> triGramWordMap = new HashMap<String, Integer>();
    Map<String, Integer> biGramDepMap = new HashMap<String, Integer>();
    Map<String, Integer> triGramDepMap = new HashMap<String, Integer>();

    private void addFeats(StringBuilder builder, String key, Object value, int weight) {
        if (((Boolean)value).booleanValue()) {
            for (int i = 0; i < weight; ++i) {
                builder.append(key).append(this.delimiter);
            }
        }
    }

    protected String getDEPString(DTree tree) {
        StringBuilder builder = new StringBuilder();
        builder.append("_<DEP>_");
        LinkedList q = new LinkedList();
        q.add(tree.getRoots().get(0));
        while (!q.isEmpty()) {
            DNode currentNode = (DNode)q.poll();
            if (currentNode == null) continue;
            builder.append(currentNode.getDepLabel()).append("_");
            q.addAll(currentNode.getChildren());
        }
        builder.append("</DEP>_");
        return builder.toString();
    }

    private String generateFeats(String sentence, DTree tree) {
        StringBuilder builder = new StringBuilder();
        sentence = sentence.trim();
        int sentenceLength = (sentence = sentence.replaceAll("_", " ")).split("\\s").length;
        int weight = sentenceLength > 10 ? sentenceLength : 10;
        String inputWithTag = sentence.toLowerCase();
        inputWithTag = inputWithTag.replaceAll("\\p{Punct}*$", "");
        inputWithTag = " <sentence> " + inputWithTag + " </sentence> ";
        inputWithTag = inputWithTag.replaceAll("\\s", "_");
        for (String string : this.biGramWordMap.keySet()) {
            this.addFeats(builder, "biGramWord_" + string, inputWithTag.contains("_" + string + "_"), 1);
        }
        for (String string : this.triGramWordMap.keySet()) {
            this.addFeats(builder, "triGramWord_" + string, inputWithTag.contains("_" + string + "_"), 1);
        }
        String depString = this.getDEPString(tree);
        for (String str : this.biGramDepMap.keySet()) {
            this.addFeats(builder, "biGramDEP_" + str, depString.contains("_" + str + "_"), 1);
        }
        for (String str : this.triGramDepMap.keySet()) {
            this.addFeats(builder, "triGramDEP_" + str, depString.contains("_" + str + "_"), 1);
        }
        HashSet hashSet = Sets.newHashSet((Object[])new String[]{"WRB", "WDT", "WP", "WP$"});
        String firstPOS = ((DNode)tree.get(1)).getPOS();
        this.addFeats(builder, "first_word_pos", hashSet.contains(firstPOS), 1);
        int lastPOSIndex = sentence.matches(".*\\p{Punct}$") ? tree.size() - 2 : tree.size() - 1;
        String lastPOS = ((DNode)tree.get(lastPOSIndex)).getPOS();
        this.addFeats(builder, "last_word_pos", hashSet.contains(lastPOS), 1);
        this.addFeats(builder, "first_word_root_verb", firstPOS.startsWith("VB") && ((DNode)tree.get(1)).isRoot(), weight);
        int auxCount = (int)((Stream)tree.stream().parallel()).filter(x -> "aux".equals(x.getDepLabel())).distinct().count();
        this.addFeats(builder, "has_aux", auxCount > 0, 1);
        HashSet bagOfQuestionPrefix = Sets.newHashSet((Object[])new String[]{"tell me", "let me know", "clarify for me", "name"});
        boolean isStartPrefixMatch = false;
        for (String prefix : bagOfQuestionPrefix) {
            if (!sentence.toLowerCase().startsWith(prefix)) continue;
            isStartPrefixMatch = true;
            break;
        }
        this.addFeats(builder, "question_over_head", isStartPrefixMatch, 1);
        HashSet imperativeKeywords = Sets.newHashSet((Object[])new String[]{"verify", "ask", "say", "solve", "run", "execute"});
        boolean isImperativeStart = imperativeKeywords.contains(((DNode)tree.get(1)).getLemma()) && ((DNode)tree.get(1)).isRoot();
        this.addFeats(builder, "has_imperative_keyword", isImperativeStart, weight);
        char punct = sentence.charAt(sentence.length() - 1);
        switch (punct) {
            case '.': 
            case ';': {
                this.addFeats(builder, "punct_dot", true, 1);
                this.addFeats(builder, "punct_question", false, 1);
                this.addFeats(builder, "punct_exclaim", false, 1);
                break;
            }
            case '!': {
                this.addFeats(builder, "punct_dot", false, 1);
                this.addFeats(builder, "punct_question", false, 1);
                this.addFeats(builder, "punct_exclaim", true, weight);
                break;
            }
            case '?': {
                this.addFeats(builder, "punct_dot", false, 1);
                this.addFeats(builder, "punct_question", true, weight);
                this.addFeats(builder, "punct_exclaim", false, 1);
                break;
            }
            default: {
                this.addFeats(builder, "punct_dot", false, 1);
                this.addFeats(builder, "punct_question", false, 1);
                this.addFeats(builder, "punct_exclaim", false, 1);
            }
        }
        this.addFeats(builder, "whether", sentence.toLowerCase().contains("whether"), 1);
        return builder.toString().trim();
    }

    public String getFeats(String entry) {
        DTree tree = this.parser.parse(entry.split(this.delimiter)[0].replaceAll("_", " "));
        return this.getFeats(entry, tree);
    }

    public String getFeats(String entry, DTree tree) {
        String[] tokens = entry.split(this.delimiter);
        if (tokens.length != 2) {
            return "";
        }
        return (tokens[0] + this.delimiter + this.generateFeats(tokens[0], tree) + this.delimiter + tokens[1]).trim();
    }

    private Map<String, Integer> deserialize(String filePath) {
        try {
            File serializedFile = new File(filePath);
            if (serializedFile.exists() && !serializedFile.isDirectory()) {
                ObjectInputStream ois = new ObjectInputStream(new FileInputStream(serializedFile));
                return (Map)ois.readObject();
            }
        }
        catch (IOException | ClassNotFoundException e) {
            e.printStackTrace();
        }
        return new HashMap<String, Integer>();
    }

    public FeatureExtractor(String filepathPrefix, String delimiter) {
        this.delimiter = delimiter;
        this.parser = new StanfordPCFGParser();
        this.filepathPrefix = filepathPrefix;
        this.biGramWordMap = this.deserialize(filepathPrefix + "/bigram_word");
        this.triGramWordMap = this.deserialize(filepathPrefix + "/trigram_word");
        this.biGramDepMap = this.deserialize(filepathPrefix + "/bigram_dep");
        this.triGramDepMap = this.deserialize(filepathPrefix + "/trigram_dep");
    }
}

