Package edu.columbia.tjw.item.spark
Class ItemClassifier
- java.lang.Object
-
- org.apache.spark.ml.PipelineStage
-
- org.apache.spark.ml.Estimator<M>
-
- org.apache.spark.ml.Predictor<FeaturesType,E,M>
-
- org.apache.spark.ml.classification.Classifier<FeaturesType,E,M>
-
- org.apache.spark.ml.classification.ProbabilisticClassifier<org.apache.spark.ml.linalg.Vector,ItemClassifier,ItemClassificationModel>
-
- edu.columbia.tjw.item.spark.ItemClassifier
-
- All Implemented Interfaces:
Serializable,Cloneable,org.apache.spark.internal.Logging,org.apache.spark.ml.classification.ClassifierParams,org.apache.spark.ml.classification.ProbabilisticClassifierParams,org.apache.spark.ml.param.Params,org.apache.spark.ml.param.shared.HasFeaturesCol,org.apache.spark.ml.param.shared.HasLabelCol,org.apache.spark.ml.param.shared.HasPredictionCol,org.apache.spark.ml.param.shared.HasProbabilityCol,org.apache.spark.ml.param.shared.HasRawPredictionCol,org.apache.spark.ml.param.shared.HasThresholds,org.apache.spark.ml.PredictorParams,org.apache.spark.ml.util.Identifiable,scala.Serializable
public class ItemClassifier extends org.apache.spark.ml.classification.ProbabilisticClassifier<org.apache.spark.ml.linalg.Vector,ItemClassifier,ItemClassificationModel> implements Cloneable
- Author:
- tyler
- See Also:
- Serialized Form
-
-
Constructor Summary
Constructors Constructor Description ItemClassifier(ItemClassifierSettings settings_)ItemClassifier(ItemClassifierSettings settings_, edu.columbia.tjw.item.ItemParameters<edu.columbia.tjw.item.base.SimpleStatus,edu.columbia.tjw.item.base.SimpleRegressor,edu.columbia.tjw.item.base.StandardCurveType> startingParams_)
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description edu.columbia.tjw.item.fit.FitResult<edu.columbia.tjw.item.base.SimpleStatus,edu.columbia.tjw.item.base.SimpleRegressor,edu.columbia.tjw.item.base.StandardCurveType>computeFitResult(org.apache.spark.sql.Dataset<?> data_, ItemClassificationModel model_)edu.columbia.tjw.item.fit.GradientResultcomputeGradients(org.apache.spark.sql.Dataset<?> data_, ItemClassificationModel model_)ItemClassifiercopy(org.apache.spark.ml.param.ParamMap paramMap_)edu.columbia.tjw.item.base.raw.RawFittingGrid<edu.columbia.tjw.item.base.SimpleStatus,edu.columbia.tjw.item.base.SimpleRegressor>generateMaterializedGrid(org.apache.spark.sql.Dataset<?> data_)ItemClassifierSettingsgetSettings()static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>prepareData(org.apache.spark.sql.Dataset<?> data_, ItemClassifierSettings settings_, String featuresColumn_)static ItemClassifierSettingsprepareSettings(org.apache.spark.sql.Dataset<?> data_, String toStatusColumn_, List<String> featureList, Set<String> curveRegressors_, int maxParamCount_)static ItemClassifierSettingsprepareSettings(org.apache.spark.sql.Dataset<?> data_, String toStatusColumn_, List<String> featureList, Set<String> curveRegressors_, int maxParamCount_, edu.columbia.tjw.item.ItemSettings settings_)ItemClassificationModelretrainModel(org.apache.spark.sql.Dataset<?> data_, ItemClassificationModel prevModel_)ItemClassificationModelrunAnnealing(org.apache.spark.sql.Dataset<?> data_, ItemClassificationModel prevModel_)ItemClassificationModeltrain(org.apache.spark.sql.Dataset<?> data_)Stringuid()-
Methods inherited from class org.apache.spark.ml.classification.ProbabilisticClassifier
getProbabilityCol, getThresholds, org$apache$spark$ml$param$shared$HasProbabilityCol$_setter_$probabilityCol_$eq, org$apache$spark$ml$param$shared$HasThresholds$_setter_$thresholds_$eq, probabilityCol, setProbabilityCol, setThresholds, thresholds, validateAndTransformSchema
-
Methods inherited from class org.apache.spark.ml.classification.Classifier
extractLabeledPoints, getNumClasses, getNumClasses$default$2, getRawPredictionCol, org$apache$spark$ml$param$shared$HasRawPredictionCol$_setter_$rawPredictionCol_$eq, rawPredictionCol, setRawPredictionCol
-
Methods inherited from class org.apache.spark.ml.Predictor
extractLabeledPoints, featuresCol, featuresDataType, fit, getFeaturesCol, getLabelCol, getPredictionCol, labelCol, org$apache$spark$ml$param$shared$HasFeaturesCol$_setter_$featuresCol_$eq, org$apache$spark$ml$param$shared$HasLabelCol$_setter_$labelCol_$eq, org$apache$spark$ml$param$shared$HasPredictionCol$_setter_$predictionCol_$eq, predictionCol, setFeaturesCol, setLabelCol, setPredictionCol, transformSchema
-
Methods inherited from class org.apache.spark.ml.PipelineStage
$, clear, copyValues, copyValues$default$2, defaultCopy, defaultParamMap, explainParam, explainParams, extractParamMap, extractParamMap, get, getDefault, getOrDefault, getParam, hasDefault, hasParam, initializeLogIfNecessary, initializeLogIfNecessary, initializeLogIfNecessary$default$2, isDefined, isSet, isTraceEnabled, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning, org$apache$spark$internal$Logging$$log_, org$apache$spark$internal$Logging$$log__$eq, org$apache$spark$ml$param$Params$_setter_$defaultParamMap_$eq, org$apache$spark$ml$param$Params$_setter_$paramMap_$eq, paramMap, params, set, set, set, setDefault, setDefault, toString, transformSchema
-
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
-
Methods inherited from interface org.apache.spark.ml.param.shared.HasFeaturesCol
featuresCol, getFeaturesCol, org$apache$spark$ml$param$shared$HasFeaturesCol$_setter_$featuresCol_$eq
-
Methods inherited from interface org.apache.spark.ml.param.shared.HasLabelCol
getLabelCol, labelCol, org$apache$spark$ml$param$shared$HasLabelCol$_setter_$labelCol_$eq
-
Methods inherited from interface org.apache.spark.ml.param.shared.HasPredictionCol
getPredictionCol, org$apache$spark$ml$param$shared$HasPredictionCol$_setter_$predictionCol_$eq, predictionCol
-
Methods inherited from interface org.apache.spark.ml.param.shared.HasRawPredictionCol
getRawPredictionCol, org$apache$spark$ml$param$shared$HasRawPredictionCol$_setter_$rawPredictionCol_$eq, rawPredictionCol
-
Methods inherited from interface org.apache.spark.ml.param.Params
$, clear, copyValues, copyValues$default$2, defaultCopy, defaultParamMap, explainParam, explainParams, extractParamMap, extractParamMap, get, getDefault, getOrDefault, getParam, hasDefault, hasParam, isDefined, isSet, org$apache$spark$ml$param$Params$_setter_$defaultParamMap_$eq, org$apache$spark$ml$param$Params$_setter_$paramMap_$eq, paramMap, params, set, set, set, setDefault, setDefault
-
-
-
-
Constructor Detail
-
ItemClassifier
public ItemClassifier(ItemClassifierSettings settings_)
-
ItemClassifier
public ItemClassifier(ItemClassifierSettings settings_, edu.columbia.tjw.item.ItemParameters<edu.columbia.tjw.item.base.SimpleStatus,edu.columbia.tjw.item.base.SimpleRegressor,edu.columbia.tjw.item.base.StandardCurveType> startingParams_)
-
-
Method Detail
-
copy
public ItemClassifier copy(org.apache.spark.ml.param.ParamMap paramMap_)
- Specified by:
copyin interfaceorg.apache.spark.ml.param.Params- Specified by:
copyin classorg.apache.spark.ml.Predictor<org.apache.spark.ml.linalg.Vector,ItemClassifier,ItemClassificationModel>
-
getSettings
public ItemClassifierSettings getSettings()
-
generateMaterializedGrid
public edu.columbia.tjw.item.base.raw.RawFittingGrid<edu.columbia.tjw.item.base.SimpleStatus,edu.columbia.tjw.item.base.SimpleRegressor> generateMaterializedGrid(org.apache.spark.sql.Dataset<?> data_)
-
prepareData
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> prepareData(org.apache.spark.sql.Dataset<?> data_, ItemClassifierSettings settings_, String featuresColumn_)
-
prepareSettings
public static ItemClassifierSettings prepareSettings(org.apache.spark.sql.Dataset<?> data_, String toStatusColumn_, List<String> featureList, Set<String> curveRegressors_, int maxParamCount_)
-
prepareSettings
public static ItemClassifierSettings prepareSettings(org.apache.spark.sql.Dataset<?> data_, String toStatusColumn_, List<String> featureList, Set<String> curveRegressors_, int maxParamCount_, edu.columbia.tjw.item.ItemSettings settings_)
-
computeGradients
public edu.columbia.tjw.item.fit.GradientResult computeGradients(org.apache.spark.sql.Dataset<?> data_, ItemClassificationModel model_)
-
computeFitResult
public edu.columbia.tjw.item.fit.FitResult<edu.columbia.tjw.item.base.SimpleStatus,edu.columbia.tjw.item.base.SimpleRegressor,edu.columbia.tjw.item.base.StandardCurveType> computeFitResult(org.apache.spark.sql.Dataset<?> data_, ItemClassificationModel model_)
-
runAnnealing
public ItemClassificationModel runAnnealing(org.apache.spark.sql.Dataset<?> data_, ItemClassificationModel prevModel_)
-
retrainModel
public ItemClassificationModel retrainModel(org.apache.spark.sql.Dataset<?> data_, ItemClassificationModel prevModel_)
-
train
public ItemClassificationModel train(org.apache.spark.sql.Dataset<?> data_)
- Specified by:
trainin classorg.apache.spark.ml.Predictor<org.apache.spark.ml.linalg.Vector,ItemClassifier,ItemClassificationModel>
-
uid
public String uid()
- Specified by:
uidin interfaceorg.apache.spark.ml.util.Identifiable
-
-