/*
 * Decompiled with CFR 0.152.
 */
package org.maochen.nlp.app.chunker;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.maochen.nlp.ml.SequenceTuple;
import org.maochen.nlp.ml.Tuple;
import org.maochen.nlp.ml.vector.IVector;
import org.maochen.nlp.ml.vector.LabeledVector;

public class ChunkerFeatureExtractor {
    public static final int WORD_INDEX = 0;
    public static final int POS_INDEX = 1;
    private static Map<String, String> BROWN_CLUSTER = new HashMap<String, String>();
    private static int[] BROWN_PREFIX = new int[]{4, 6, 10, 20};

    private static void addFeat(List<String> feat, String key, String ... val) {
        String entry = Arrays.stream(val).reduce((v1, v2) -> v1 + "_" + v2).get();
        entry = key + "=" + entry;
        feat.add(entry);
    }

    private static Map<String, String> extractBrownFeat(String word) {
        if (!BROWN_CLUSTER.containsKey(word)) {
            return new HashMap<String, String>();
        }
        String clusterId = BROWN_CLUSTER.get(word);
        return Arrays.stream(BROWN_PREFIX).mapToObj(p -> {
            int end = Math.min(p, clusterId.length());
            return new AbstractMap.SimpleEntry<String, String>("brown_" + p, clusterId.substring(0, end));
        }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    }

    public static List<String> extractFeatSingle(int i, String[] tokens, String[] pos) {
        int index;
        ArrayList<String> currentFeats = new ArrayList<String>();
        for (index = Math.max(0, i - 2); index < Math.min(i + 3, tokens.length); ++index) {
            ChunkerFeatureExtractor.addFeat(currentFeats, "w" + (index - i), tokens[index]);
            ChunkerFeatureExtractor.addFeat(currentFeats, "pos" + (index - i), pos[index]);
            if (index == i - 1) {
                ChunkerFeatureExtractor.addFeat(currentFeats, "w-10", tokens[i - 1], tokens[i]);
                ChunkerFeatureExtractor.addFeat(currentFeats, "pos-10", pos[i - 1], pos[i]);
            } else if (index == i + 1) {
                ChunkerFeatureExtractor.addFeat(currentFeats, "w0+1", tokens[i], tokens[i + 1]);
                ChunkerFeatureExtractor.addFeat(currentFeats, "pos0+1", pos[i], pos[i + 1]);
            } else if (index == i - 2) {
                ChunkerFeatureExtractor.addFeat(currentFeats, "pos-2-1", pos[i - 2], pos[i - 1]);
                ChunkerFeatureExtractor.addFeat(currentFeats, "pos-2-10", pos[i - 2], pos[i - 1], pos[i]);
            } else if (index == i + 2) {
                ChunkerFeatureExtractor.addFeat(currentFeats, "pos+1+2", pos[i + 1], pos[i + 2]);
            }
            if (index == i - 1 && i < tokens.length - 1) {
                ChunkerFeatureExtractor.addFeat(currentFeats, "pos-10+1", pos[i - 1], pos[i], pos[i + 1]);
            }
            if (index != i + 2) continue;
            ChunkerFeatureExtractor.addFeat(currentFeats, "pos0+1+2", pos[i], pos[i + 1], pos[i + 2]);
        }
        index = Math.max(0, i - 2);
        while (index < Math.min(i + 3, tokens.length)) {
            Map<String, String> feats = ChunkerFeatureExtractor.extractBrownFeat(tokens[index]);
            int finalIndex = index++;
            feats.entrySet().stream().forEach(entry -> ChunkerFeatureExtractor.addFeat(currentFeats, (String)entry.getKey() + "_" + (finalIndex - i), (String)entry.getValue()));
        }
        return currentFeats;
    }

    public static List<Tuple> extractFeat(SequenceTuple entry) {
        String[] tokens = (String[])entry.entries.stream().map(tuple -> ((LabeledVector)tuple.vector).featsName[0]).toArray(String[]::new);
        String[] pos = (String[])entry.entries.stream().map(tuple -> ((LabeledVector)tuple.vector).featsName[1]).toArray(String[]::new);
        List feats = IntStream.range(0, tokens.length).mapToObj(i -> ChunkerFeatureExtractor.extractFeatSingle(i, tokens, pos)).collect(Collectors.toList());
        ArrayList<Tuple> tuples = new ArrayList<Tuple>();
        for (int i2 = 0; i2 < feats.size(); ++i2) {
            List singleTokenFeat = (List)feats.get(i2);
            LabeledVector v = new LabeledVector((String[])singleTokenFeat.stream().toArray(String[]::new));
            Tuple t = new Tuple((IVector)v);
            t.label = ((Tuple)entry.entries.get((int)i2)).label;
            tuples.add(t);
        }
        return tuples;
    }

    static {
        try (BufferedReader br = new BufferedReader(new InputStreamReader(ChunkerFeatureExtractor.class.getResourceAsStream("/brown.rcv1.3200.txt")));){
            String line = br.readLine();
            while (line != null) {
                String[] fields = line.split("\\s");
                BROWN_CLUSTER.put(fields[1], fields[0]);
                line = br.readLine();
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }
}

