/*
 * Decompiled with CFR 0.152.
 */
package eus.ixa.ixa.pipe.nerc.features;

import java.util.List;
import java.util.regex.Pattern;
import opennlp.tools.util.featuregen.FeatureGeneratorAdapter;
import opennlp.tools.util.featuregen.StringPattern;

public class TokenClassFeatureGenerator
extends FeatureGeneratorAdapter {
    private static Pattern capPeriod = Pattern.compile("^[A-Z]\\.$");
    private boolean generateWordAndClassFeature;

    public TokenClassFeatureGenerator() {
        this(true);
    }

    public TokenClassFeatureGenerator(boolean generateWordAndClassFeature) {
        this.generateWordAndClassFeature = generateWordAndClassFeature;
    }

    @Override
    public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) {
        String wordClass = TokenClassFeatureGenerator.tokenShapeFeature(tokens[index]);
        features.add("wc=" + wordClass);
        if (this.generateWordAndClassFeature) {
            features.add("w&c=" + tokens[index].toLowerCase() + "," + wordClass);
        }
    }

    public static String tokenShapeFeature(String token) {
        StringPattern pattern = StringPattern.recognize(token);
        String feat = pattern.isAllLowerCaseLetter() ? "lc" : (pattern.digits() == 2 ? "2d" : (pattern.digits() == 4 ? "4d" : (pattern.containsDigit() ? (pattern.containsLetters() ? "an" : (pattern.containsHyphen() ? "dd" : (pattern.containsSlash() ? "ds" : (pattern.containsComma() ? "dc" : (pattern.containsPeriod() ? "dp" : "num"))))) : (pattern.isAllCapitalLetter() && token.length() == 1 ? "sc" : (pattern.isAllCapitalLetter() ? "ac" : (capPeriod.matcher(token).find() ? "cp" : (pattern.isInitialCapitalLetter() ? "ic" : "other")))))));
        return feat;
    }
}

