Package sklearn.feature_extraction.text
Class CountVectorizer
- java.lang.Object
-
- java.util.AbstractMap<K,V>
-
- java.util.HashMap<java.lang.String,java.lang.Object>
-
- net.razorvine.pickle.objects.ClassDict
-
- org.jpmml.python.PythonObject
-
- sklearn.Step
-
- sklearn.Transformer
-
- sklearn.SkLearnTransformer
-
- sklearn.feature_extraction.text.CountVectorizer
-
- All Implemented Interfaces:
java.io.Serializable,java.lang.Cloneable,java.util.Map<java.lang.String,java.lang.Object>,Encodable,HasFeatureNamesIn,HasNumberOfFeatures,HasSparseOutput,HasType,HasPMMLName<Transformer>
- Direct Known Subclasses:
TfidfVectorizer
public class CountVectorizer extends SkLearnTransformer implements HasSparseOutput
- See Also:
- Serialized Form
-
-
Field Summary
Fields Modifier and Type Field Description static java.lang.StringTOKEN_PATTERN-
Fields inherited from interface sklearn.HasNumberOfFeatures
UNKNOWN
-
-
Constructor Summary
Constructors Constructor Description CountVectorizer(java.lang.String module, java.lang.String name)
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description org.dmg.pmml.ApplyencodeApply(org.dmg.pmml.DefineFunction defineFunction, org.jpmml.converter.Feature feature, int index, java.lang.String term)org.dmg.pmml.DefineFunctionencodeDefineFunction(org.jpmml.converter.Feature feature, SkLearnEncoder encoder)java.util.List<org.jpmml.converter.Feature>encodeFeatures(java.util.List<org.jpmml.converter.Feature> features, SkLearnEncoder encoder)java.lang.StringfunctionName()java.lang.StringgetAnalyzer()java.lang.BooleangetBinary()org.dmg.pmml.DataTypegetDataType()org.jpmml.python.TypeInfogetDType()java.lang.BooleangetLowercase()java.lang.Object[]getNGramRange()intgetNumberOfFeatures()org.dmg.pmml.OpTypegetOpType()java.lang.ObjectgetPreprocessor()java.lang.BooleangetSparseOutput()java.util.List<java.lang.String>getStopWords()java.lang.StringgetStripAccents()TokenizergetTokenizer()java.lang.StringgetTokenPattern()java.util.Map<java.lang.String,?>getVocabulary()-
Methods inherited from class sklearn.SkLearnTransformer
encodePMML
-
Methods inherited from class sklearn.Transformer
checkFeatures, createFieldName, createFieldName, encode, getDType, getOptionalDType, refineWildcardFeature, setPMMLName, updateDataField, updateFeatures
-
Methods inherited from class sklearn.Step
checkSkLearnVersion, checkVersion, getFeatureNamesIn, getPMMLName, getSkLearnVersion
-
Methods inherited from class org.jpmml.python.PythonObject
containsKey, delattr, get, get, getArray, getArray, getArray, getArrayList, getArrayList, getArrayShape, getArrayShape, getattr, getattr, getBoolean, getBooleanArray, getClassName, getDict, getEnum, getEnumList, getIdentifiable, getInteger, getIntegerArray, getList, getList, getListLike, getListLike, getNumber, getNumberArray, getObject, getObjectArray, getObjectList, getOptional, getOptionalBoolean, getOptionalBoolean, getOptionalDict, getOptionalEnum, getOptionalIdentifiable, getOptionalInteger, getOptionalNumber, getOptionalObject, getOptionalScalar, getOptionalString, getOptionalTuple, getPythonModule, getPythonName, getScalar, getString, getStringArray, getStringList, getTuple, getTupleList, hasattr, put, putAll, remove, setattr, setClassName, update
-
Methods inherited from class java.util.HashMap
clear, clone, compute, computeIfAbsent, computeIfPresent, containsValue, entrySet, forEach, getOrDefault, isEmpty, keySet, merge, putIfAbsent, remove, replace, replace, replaceAll, size, values
-
Methods inherited from interface sklearn.HasFeatureNamesIn
getFeatureNamesIn
-
Methods inherited from interface sklearn2pmml.HasPMMLName
getPMMLName
-
-
-
-
Field Detail
-
TOKEN_PATTERN
public static final java.lang.String TOKEN_PATTERN
- See Also:
- Constant Field Values
-
-
Method Detail
-
getNumberOfFeatures
public int getNumberOfFeatures()
- Specified by:
getNumberOfFeaturesin interfaceHasNumberOfFeatures- Overrides:
getNumberOfFeaturesin classTransformer
-
getOpType
public org.dmg.pmml.OpType getOpType()
- Specified by:
getOpTypein interfaceHasType- Overrides:
getOpTypein classTransformer
-
getDataType
public org.dmg.pmml.DataType getDataType()
- Specified by:
getDataTypein interfaceHasType- Overrides:
getDataTypein classTransformer
-
encodeFeatures
public java.util.List<org.jpmml.converter.Feature> encodeFeatures(java.util.List<org.jpmml.converter.Feature> features, SkLearnEncoder encoder)- Specified by:
encodeFeaturesin classTransformer
-
encodeDefineFunction
public org.dmg.pmml.DefineFunction encodeDefineFunction(org.jpmml.converter.Feature feature, SkLearnEncoder encoder)
-
encodeApply
public org.dmg.pmml.Apply encodeApply(org.dmg.pmml.DefineFunction defineFunction, org.jpmml.converter.Feature feature, int index, java.lang.String term)
-
functionName
public java.lang.String functionName()
-
getAnalyzer
public java.lang.String getAnalyzer()
-
getBinary
public java.lang.Boolean getBinary()
-
getDType
public org.jpmml.python.TypeInfo getDType()
-
getLowercase
public java.lang.Boolean getLowercase()
-
getNGramRange
public java.lang.Object[] getNGramRange()
-
getPreprocessor
public java.lang.Object getPreprocessor()
-
getSparseOutput
public java.lang.Boolean getSparseOutput()
- Specified by:
getSparseOutputin interfaceHasSparseOutput
-
getStopWords
public java.util.List<java.lang.String> getStopWords()
-
getStripAccents
public java.lang.String getStripAccents()
-
getTokenizer
public Tokenizer getTokenizer()
-
getTokenPattern
public java.lang.String getTokenPattern()
- See Also:
TOKEN_PATTERN
-
getVocabulary
public java.util.Map<java.lang.String,?> getVocabulary()
-
-