/*
 * Decompiled with CFR 0.152.
 */
package sklearn.feature_extraction.text;

import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import numpy.DType;
import numpy.core.Scalar;
import org.dmg.pmml.Apply;
import org.dmg.pmml.Constant;
import org.dmg.pmml.DataType;
import org.dmg.pmml.DefineFunction;
import org.dmg.pmml.DerivedField;
import org.dmg.pmml.Expression;
import org.dmg.pmml.FieldName;
import org.dmg.pmml.FieldRef;
import org.dmg.pmml.OpType;
import org.dmg.pmml.ParameterField;
import org.dmg.pmml.TextIndex;
import org.dmg.pmml.TypeDefinitionField;
import org.jpmml.converter.ContinuousFeature;
import org.jpmml.converter.Feature;
import org.jpmml.converter.PMMLEncoder;
import org.jpmml.converter.PMMLUtil;
import org.jpmml.converter.ValueUtil;
import org.jpmml.sklearn.ClassDictUtil;
import org.jpmml.sklearn.SkLearnEncoder;
import sklearn.HasNumberOfFeatures;
import sklearn.Transformer;
import sklearn2pmml.feature_extraction.text.Splitter;

public class CountVectorizer
extends Transformer
implements HasNumberOfFeatures {
    public CountVectorizer(String module, String name) {
        super(module, name);
    }

    @Override
    public int getNumberOfFeatures() {
        return 1;
    }

    @Override
    public OpType getOpType() {
        return OpType.CATEGORICAL;
    }

    @Override
    public DataType getDataType() {
        return DataType.STRING;
    }

    @Override
    public List<Feature> encodeFeatures(List<String> ids, List<Feature> features, SkLearnEncoder encoder) {
        Boolean lowercase = this.getLowercase();
        Map<String, Scalar> vocabulary = this.getVocabulary();
        ClassDictUtil.checkSize(1, ids, features);
        Feature feature = features.get(0);
        HashBiMap termIndexMap = HashBiMap.create((int)vocabulary.size());
        Set<Map.Entry<String, Scalar>> entries = vocabulary.entrySet();
        for (Map.Entry entry : entries) {
            termIndexMap.put(entry.getKey(), (Object)ValueUtil.asInt((Number)((Number)((Scalar)entry.getValue()).getOnlyElement())));
        }
        BiMap indexTermMap = termIndexMap.inverse();
        DType dType = this.getDType();
        if (lowercase.booleanValue()) {
            DerivedField derivedField = encoder.createDerivedField(FieldName.create((String)("lowercase(" + feature.getName().getValue() + ")")), OpType.CATEGORICAL, DataType.STRING, (Expression)PMMLUtil.createApply((String)"lowercase", (Expression[])new Expression[]{feature.ref()}));
            feature = new Feature((PMMLEncoder)encoder, derivedField.getName(), derivedField.getDataType()){

                public ContinuousFeature toContinuousFeature() {
                    throw new UnsupportedOperationException();
                }
            };
        }
        DefineFunction defineFunction = this.encodeDefineFunction();
        encoder.addDefineFunction(defineFunction);
        ids.clear();
        ArrayList<Feature> result = new ArrayList<Feature>();
        int max = indexTermMap.size();
        for (int i = 0; i < max; ++i) {
            String term = (String)indexTermMap.get((Object)i);
            final Apply apply = this.encodeApply(defineFunction.getName(), feature, i, term);
            Feature termFeature = new Feature((PMMLEncoder)encoder, FieldName.create((String)(defineFunction.getName() + "(" + term + ")")), dType != null ? dType.getDataType() : DataType.DOUBLE){

                public ContinuousFeature toContinuousFeature() {
                    PMMLEncoder encoder = this.ensureEncoder();
                    DerivedField derivedField = encoder.getDerivedField(this.getName());
                    if (derivedField == null) {
                        derivedField = encoder.createDerivedField(this.getName(), OpType.CONTINUOUS, this.getDataType(), (Expression)apply);
                    }
                    return new ContinuousFeature(encoder, (TypeDefinitionField)derivedField);
                }
            };
            ids.add(termFeature.getName().getValue());
            result.add(termFeature);
        }
        return result;
    }

    public DefineFunction encodeDefineFunction() {
        String analyzer = this.getAnalyzer();
        Boolean binary = this.getBinary();
        Object preprocessor = this.getPreprocessor();
        String stripAccents = this.getStripAccents();
        Splitter tokenizer = this.getTokenizer();
        switch (analyzer) {
            case "word": {
                break;
            }
            default: {
                throw new IllegalArgumentException(analyzer);
            }
        }
        if (preprocessor != null) {
            throw new IllegalArgumentException();
        }
        if (stripAccents != null) {
            throw new IllegalArgumentException(stripAccents);
        }
        ParameterField documentField = new ParameterField(FieldName.create((String)"document"));
        ParameterField termField = new ParameterField(FieldName.create((String)"term"));
        TextIndex textIndex = new TextIndex(documentField.getName()).setTokenize(Boolean.TRUE).setWordSeparatorCharacterRE(tokenizer.getSeparatorRE()).setLocalTermWeights((TextIndex.LocalTermWeights)(binary != false ? TextIndex.LocalTermWeights.BINARY : null)).setExpression((Expression)new FieldRef(termField.getName()));
        DefineFunction defineFunction = new DefineFunction("tf", OpType.CONTINUOUS, null).setDataType(DataType.DOUBLE).addParameterFields(new ParameterField[]{documentField, termField}).setExpression((Expression)textIndex);
        return defineFunction;
    }

    public Apply encodeApply(String function, Feature feature, int index, String term) {
        Constant constant = PMMLUtil.createConstant((Object)term).setDataType(DataType.STRING);
        return PMMLUtil.createApply((String)function, (Expression[])new Expression[]{feature.ref(), constant});
    }

    public String getAnalyzer() {
        return (String)this.get("analyzer");
    }

    public Boolean getBinary() {
        return (Boolean)this.get("binary");
    }

    public Boolean getLowercase() {
        return (Boolean)this.get("lowercase");
    }

    public Object getPreprocessor() {
        return this.get("preprocessor");
    }

    public String getStripAccents() {
        return (String)this.get("strip_accents");
    }

    public Splitter getTokenizer() {
        Object tokenizer = this.get("tokenizer");
        try {
            if (tokenizer == null) {
                throw new NullPointerException();
            }
            return (Splitter)((Object)tokenizer);
        }
        catch (RuntimeException re) {
            throw new IllegalArgumentException("The tokenizer object (" + ClassDictUtil.formatClass(tokenizer) + ") is not Splitter");
        }
    }

    public String getTokenPattern() {
        return (String)this.get("token_pattern");
    }

    public Map<String, Scalar> getVocabulary() {
        return (Map)this.get("vocabulary_");
    }
}

