/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jnet.tagger;

import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.tsf.LexiconMembership;
import cc.mallet.pipe.tsf.OffsetConjunctions;
import cc.mallet.pipe.tsf.RegexMatches;
import cc.mallet.pipe.tsf.TokenTextCharNGrams;
import cc.mallet.pipe.tsf.TokenTextCharPrefix;
import cc.mallet.pipe.tsf.TokenTextCharSuffix;
import cc.mallet.types.FeatureVectorSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import de.julielab.jnet.tagger.BasePipe;
import de.julielab.jnet.tagger.FeatureConfiguration;
import de.julielab.jnet.tagger.Sentence;
import de.julielab.jnet.tagger.SentencePipeIterator;
import de.julielab.jnet.tagger.TokenNGramPipe;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Properties;
import java.util.regex.Pattern;

class FeatureGenerator {
    private static final String GREEK = "(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)";
    private static final String UNICODE_UPPER = "\\p{Lu}";
    private static final String UNICODE_LOWER = "\\p{Ll}";

    FeatureGenerator() {
    }

    public InstanceList createFeatureData(ArrayList<Sentence> sentences, Properties featureConfig) {
        int[] charNGrams;
        int[] tokenNGrams;
        int[] suffixSizes;
        int[] prefixSizes;
        FeatureConfiguration fc = new FeatureConfiguration();
        ArrayList<Pipe> pipeParam = new ArrayList<Pipe>();
        pipeParam.add(new BasePipe(featureConfig));
        pipeParam.add(new RegexMatches("INITLOWCAPS_ANYTHING_NONUMBER", Pattern.compile("[\\p{Ll}][\\p{Lu}][^0-9]*")));
        pipeParam.add(new RegexMatches("INITLOWCAPS_ANYTHING_WITHNUMBER", Pattern.compile("[\\p{Ll}][\\p{Lu}].*[0-9].*")));
        pipeParam.add(new RegexMatches("INITCAPS", Pattern.compile("[\\p{Lu}].*")));
        pipeParam.add(new RegexMatches("INITCAPSALPHA", Pattern.compile("[\\p{Lu}][\\p{Ll}].*")));
        pipeParam.add(new RegexMatches("ALLCAPS", Pattern.compile("[\\p{Lu}]+")));
        pipeParam.add(new RegexMatches("CAPSMIX", Pattern.compile("[\\p{Lu}\\p{Ll}]+")));
        pipeParam.add(new RegexMatches("HASDIGIT", Pattern.compile(".*[0-9].*")));
        pipeParam.add(new RegexMatches("SINGLEDIGIT", Pattern.compile("[0-9]")));
        pipeParam.add(new RegexMatches("DOUBLEDIGIT", Pattern.compile("[0-9][0-9]")));
        pipeParam.add(new RegexMatches("NATURALNUMBER", Pattern.compile("[0-9]+")));
        pipeParam.add(new RegexMatches("REALNUMBER", Pattern.compile("[-0-9]+[.,]+[0-9.,]+")));
        pipeParam.add(new RegexMatches("HASDASH", Pattern.compile(".*-.*")));
        pipeParam.add(new RegexMatches("INITDASH", Pattern.compile("-.*")));
        pipeParam.add(new RegexMatches("ENDDASH", Pattern.compile(".*-")));
        pipeParam.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[\\p{Lu}\\p{Ll}].*[0-9].*")));
        pipeParam.add(new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[0-9].*[\\p{Lu}\\p{Ll}].*")));
        pipeParam.add(new RegexMatches("IS_PUNCTUATION_MARK", Pattern.compile("[,.;:?!]")));
        pipeParam.add(new RegexMatches("IS_MINUSDASHSLASH", Pattern.compile("[-_/]")));
        if (fc.featureActive(featureConfig, "feat_bioregexp_enabled")) {
            pipeParam.add(new RegexMatches("ROMAN", Pattern.compile("[IVXDLCM]+")));
            pipeParam.add(new RegexMatches("HASROMAN", Pattern.compile(".*\\b[IVXDLCM]+\\b.*")));
            pipeParam.add(new RegexMatches("GREEK", Pattern.compile(GREEK)));
            pipeParam.add(new RegexMatches("HASGREEK", Pattern.compile(".*\\b(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)\\b.*")));
        }
        if ((prefixSizes = fc.getIntArray(featureConfig, "prefix_sizes")) != null) {
            for (int prefixSize : prefixSizes) {
                pipeParam.add(new TokenTextCharPrefix("PREFIX=", prefixSize));
            }
        }
        if ((suffixSizes = fc.getIntArray(featureConfig, "suffix_sizes")) != null) {
            for (int suffixSize : suffixSizes) {
                pipeParam.add(new TokenTextCharSuffix("SUFFIX=", suffixSize));
            }
        }
        for (String key : fc.getLexiconKeys(featureConfig)) {
            File lexFile = new File(featureConfig.getProperty(key));
            try {
                pipeParam.add(new LexiconMembership(key + "_membership", lexFile, true));
            }
            catch (FileNotFoundException e) {
                e.printStackTrace();
            }
        }
        int[][] offset = fc.offsetConjFromConfig(featureConfig.getProperty("offset_conjunctions"));
        if (offset != null) {
            pipeParam.add(new OffsetConjunctions(offset));
        }
        if ((tokenNGrams = fc.getIntArray(featureConfig, "token_ngrams")) != null) {
            pipeParam.add(new TokenNGramPipe(tokenNGrams));
        }
        if ((charNGrams = fc.getIntArray(featureConfig, "char_ngrams")) != null) {
            pipeParam.add(new TokenTextCharNGrams("CHAR_NGRAM=", charNGrams));
        }
        pipeParam.add(new TokenSequence2FeatureVectorSequence(true, true));
        Pipe[] pipeParamArray = new Pipe[pipeParam.size()];
        pipeParam.toArray(pipeParamArray);
        SerialPipes myPipe = new SerialPipes(pipeParamArray);
        InstanceList data = new InstanceList(myPipe);
        SentencePipeIterator iterator = new SentencePipeIterator(sentences);
        data.addThruPipe(iterator);
        return data;
    }

    public static InstanceList convertFeatsforClassifier(Pipe METrainerDummyPipe2, InstanceList orgList) {
        InstanceList iList = new InstanceList(METrainerDummyPipe2);
        for (int i = 0; i < orgList.size(); ++i) {
            Instance inst = (Instance)orgList.get(i);
            FeatureVectorSequence fvs = (FeatureVectorSequence)inst.getData();
            LabelSequence ls = (LabelSequence)inst.getTarget();
            LabelAlphabet ldict = (LabelAlphabet)ls.getAlphabet();
            Object source = inst.getSource();
            Object name = inst.getName();
            if (ls.size() != fvs.size()) {
                System.err.println("failed making token instances: size of labelsequence != size of featue vector sequence: " + ls.size() + " - " + fvs.size());
                System.exit(-1);
            }
            for (int j = 0; j < fvs.size(); ++j) {
                Instance I = new Instance(fvs.getFeatureVector(j), ldict.lookupLabel(ls.get(j)), name, source);
                iList.add(I);
            }
        }
        return iList;
    }

    public static InstanceList convertFeatsforClassifier(Pipe METrainerDummyPipe2, Instance inst) {
        InstanceList iList = new InstanceList(METrainerDummyPipe2);
        FeatureVectorSequence fvs = (FeatureVectorSequence)inst.getData();
        LabelSequence ls = (LabelSequence)inst.getTarget();
        LabelAlphabet ldict = (LabelAlphabet)ls.getAlphabet();
        Object source = inst.getSource();
        Object name = inst.getName();
        if (ls.size() != fvs.size()) {
            System.err.println("failed making token instances: size of labelsequence != size of featue vector sequence: " + ls.size() + " - " + fvs.size());
            System.exit(-1);
        }
        for (int j = 0; j < fvs.size(); ++j) {
            Instance I = new Instance(fvs.getFeatureVector(j), ldict.lookupLabel(ls.get(j)), name, source);
            iList.add(I);
        }
        return iList;
    }
}

