001/** 
002 * Copyright (c) 2011, Regents of the University of Colorado 
003 * All rights reserved.
004 * 
005 * Redistribution and use in source and binary forms, with or without
006 * modification, are permitted provided that the following conditions are met:
007 * 
008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
011 * 
012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
022 * POSSIBILITY OF SUCH DAMAGE. 
023 */
024package org.cleartk.ml.jar;
025
026import java.io.BufferedOutputStream;
027import java.io.File;
028import java.io.FileOutputStream;
029import java.io.IOException;
030import java.io.ObjectInputStream;
031import java.io.ObjectOutputStream;
032import java.io.OutputStream;
033import java.util.jar.JarInputStream;
034import java.util.jar.JarOutputStream;
035
036import org.cleartk.ml.encoder.features.FeaturesEncoder;
037import org.cleartk.ml.encoder.features.FeaturesEncoder_ImplBase;
038import org.cleartk.ml.encoder.outcome.OutcomeEncoder;
039import org.cleartk.util.ReflectionUtil;
040
041/**
042 * Superclass for builders which write to a training data file using {@link FeaturesEncoder}s and
043 * {@link OutcomeEncoder}s, and package classifiers as jar files.
044 * 
045 * Subclasses will typically override:
046 * <ul>
047 * <li>{@link #saveToTrainingDirectory(File)} to add items to the model training directory</li>
048 * <li>{@link #packageClassifier(File, JarOutputStream)} to copy items to the classifier jar</li>
049 * <li>{@link #unpackageClassifier(JarInputStream)} to load items from the classifier jar</li>
050 * <li>{@link #newClassifier()} to create a classifier from the loaded attributes</li>
051 * </ul>
052 * 
053 * <br>
054 * Copyright (c) 2011, Regents of the University of Colorado <br>
055 * All rights reserved.
056 * 
057 * @author Steven Bethard
058 */
059public abstract class EncodingJarClassifierBuilder<CLASSIFIER_TYPE, ENCODED_FEATURES_TYPE, OUTCOME_TYPE, ENCODED_OUTCOME_TYPE>
060    extends JarClassifierBuilder<CLASSIFIER_TYPE> {
061
062  private static final String ENCODERS_FILE_NAME = FeaturesEncoder_ImplBase.ENCODERS_FILE_NAME;
063
064  public static File getEncodersFile(File dir) {
065    return new File(dir, ENCODERS_FILE_NAME);
066  }
067
068  protected FeaturesEncoder<ENCODED_FEATURES_TYPE> featuresEncoder;
069
070  public FeaturesEncoder<ENCODED_FEATURES_TYPE> getFeaturesEncoder() {
071    return featuresEncoder;
072  }
073
074  public void setFeaturesEncoder(FeaturesEncoder<ENCODED_FEATURES_TYPE> featuresEncoder) {
075    this.featuresEncoder = featuresEncoder;
076  }
077
078  protected OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> outcomeEncoder;
079
080  public OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> getOutcomeEncoder() {
081    return outcomeEncoder;
082  }
083
084  public void setOutcomeEncoder(OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> outcomeEncoder) {
085    this.outcomeEncoder = outcomeEncoder;
086  }
087
088  public abstract File getTrainingDataFile(File dir);
089
090  @Override
091  public void saveToTrainingDirectory(File dir) throws IOException {
092    super.saveToTrainingDirectory(dir);
093    // finalize the encoder feature set
094    this.featuresEncoder.finalizeFeatureSet(dir);
095    this.outcomeEncoder.finalizeOutcomeSet(dir);
096
097    // save the encoders to the directory
098    File encodersFile = getEncodersFile(dir);
099    OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(encodersFile));
100    ObjectOutputStream os = new ObjectOutputStream(outputStream);
101    os.writeObject(this.featuresEncoder);
102    os.writeObject(this.outcomeEncoder);
103    os.close();
104    outputStream.close();
105
106  }
107
108  @Override
109  protected void packageClassifier(File dir, JarOutputStream modelStream) throws IOException {
110    super.packageClassifier(dir, modelStream);
111    JarStreams.putNextJarEntry(modelStream, ENCODERS_FILE_NAME, getEncodersFile(dir));
112  }
113
114  @Override
115  protected void unpackageClassifier(JarInputStream modelStream) throws IOException {
116    super.unpackageClassifier(modelStream);
117    JarStreams.getNextJarEntry(modelStream, ENCODERS_FILE_NAME);
118    ObjectInputStream is = new ObjectInputStream(modelStream);
119    try {
120      this.featuresEncoder = this.featuresEncoderCast(is.readObject());
121      this.outcomeEncoder = this.outcomeEncoderCast(is.readObject());
122    } catch (ClassNotFoundException e) {
123      throw new RuntimeException("Classes not found for serialized encoder objects", e);
124    }
125  }
126
127  @SuppressWarnings("unchecked")
128  private FeaturesEncoder<ENCODED_FEATURES_TYPE> featuresEncoderCast(Object object) {
129    FeaturesEncoder<ENCODED_FEATURES_TYPE> encoder = (FeaturesEncoder<ENCODED_FEATURES_TYPE>) object;
130
131    ReflectionUtil.checkTypeParametersAreEqual(
132        EncodingJarClassifierBuilder.class,
133        "ENCODED_FEATURES_TYPE",
134        this,
135        FeaturesEncoder.class,
136        "ENCODED_FEATURES_TYPE",
137        encoder,
138        ClassCastException.class);
139
140    return encoder;
141  }
142
143  @SuppressWarnings("unchecked")
144  private OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> outcomeEncoderCast(Object object) {
145    OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> encoder;
146    encoder = (OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE>) object;
147
148    ReflectionUtil.checkTypeParametersAreEqual(
149        EncodingJarClassifierBuilder.class,
150        "OUTCOME_TYPE",
151        this,
152        OutcomeEncoder.class,
153        "OUTCOME_TYPE",
154        encoder,
155        ClassCastException.class);
156
157    ReflectionUtil.checkTypeParametersAreEqual(
158        EncodingJarClassifierBuilder.class,
159        "ENCODED_OUTCOME_TYPE",
160        this,
161        OutcomeEncoder.class,
162        "ENCODED_OUTCOME_TYPE",
163        encoder,
164        ClassCastException.class);
165
166    return encoder;
167  }
168}