/*
 * Decompiled with CFR 0.152.
 */
package org.maochen.nlp.app.ner;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.maochen.nlp.app.featextractor.BrownFeatExtractor;
import org.maochen.nlp.app.featextractor.IFeatureExtractor;
import org.maochen.nlp.ml.SequenceTuple;
import org.maochen.nlp.ml.Tuple;
import org.maochen.nlp.ml.vector.FeatNamedVector;
import org.maochen.nlp.ml.vector.IVector;

public class NERFeatureExtractor
implements IFeatureExtractor {
    private static String getWordShape(String str) {
        StringBuilder stringBuilder = new StringBuilder();
        for (int i = 0; i < str.length(); ++i) {
            if (Character.isUpperCase(str.charAt(i))) {
                stringBuilder.append("X");
                continue;
            }
            stringBuilder.append("x");
        }
        return stringBuilder.toString().trim();
    }

    public List<String> extractFeatSingle(int i, String[] tokens) {
        ArrayList<String> currentFeats = new ArrayList<String>();
        for (int index = Math.max(0, i - 2); index < Math.min(i + 3, tokens.length); ++index) {
            boolean containsDigitCharacter;
            IFeatureExtractor.addFeat(currentFeats, "w" + (index - i), tokens[index]);
            IFeatureExtractor.addFeat(currentFeats, "word_length", String.valueOf(tokens[index].length()));
            IFeatureExtractor.addFeat(currentFeats, "word_shape", NERFeatureExtractor.getWordShape(tokens[index]));
            boolean containsDigit = Pattern.compile("\\d+").matcher(tokens[index]).find();
            boolean containsTwoDigit = Pattern.compile("\\d{2}").matcher(tokens[index]).find();
            boolean containsFourDigit = Pattern.compile("\\d{4}").matcher(tokens[index]).find();
            boolean containsChar = Pattern.compile("[%|,|.|/|-]").matcher(tokens[index]).find();
            boolean bl = containsDigitCharacter = containsChar && containsDigit;
            if (containsChar) {
                IFeatureExtractor.addFeat(currentFeats, "contains_char", new String[0]);
            }
            if (containsDigit) {
                IFeatureExtractor.addFeat(currentFeats, "contains_digit", new String[0]);
            }
            if (containsTwoDigit) {
                IFeatureExtractor.addFeat(currentFeats, "contains_two_digit", new String[0]);
            }
            if (containsFourDigit) {
                IFeatureExtractor.addFeat(currentFeats, "contains_four_digit", new String[0]);
            }
            if (containsDigitCharacter) {
                IFeatureExtractor.addFeat(currentFeats, "contains_digit_char", new String[0]);
            }
            if (index == i - 1) {
                IFeatureExtractor.addFeat(currentFeats, "w-10", tokens[i - 1], tokens[i]);
                continue;
            }
            if (index != i + 1) continue;
            IFeatureExtractor.addFeat(currentFeats, "w0+1", tokens[i], tokens[i + 1]);
        }
        currentFeats.addAll(BrownFeatExtractor.extractBrownFeat(i, -2, 2, tokens));
        return currentFeats;
    }

    @Override
    public List<Tuple> extractFeat(SequenceTuple entry) {
        String[] tokens = (String[])entry.entries.stream().map(tuple -> ((FeatNamedVector)tuple.vector).featsName[0]).toArray(String[]::new);
        List feats = IntStream.range(0, tokens.length).mapToObj(i -> this.extractFeatSingle(i, tokens)).collect(Collectors.toList());
        ArrayList<Tuple> tuples = new ArrayList<Tuple>();
        for (int i2 = 0; i2 < feats.size(); ++i2) {
            List singleTokenFeat = (List)feats.get(i2);
            FeatNamedVector v = new FeatNamedVector((String[])singleTokenFeat.stream().toArray(String[]::new));
            Tuple t = new Tuple((IVector)v);
            t.label = ((Tuple)entry.entries.get((int)i2)).label;
            tuples.add(t);
        }
        return tuples;
    }
}

