001/** 002 * Copyright (c) 2011, Regents of the University of Colorado 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions are met: 007 * 008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 011 * 012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 022 * POSSIBILITY OF SUCH DAMAGE. 023 */ 024package org.cleartk.ml.jar; 025 026import java.io.BufferedOutputStream; 027import java.io.File; 028import java.io.FileOutputStream; 029import java.io.IOException; 030import java.io.ObjectInputStream; 031import java.io.ObjectOutputStream; 032import java.io.OutputStream; 033import java.util.jar.JarInputStream; 034import java.util.jar.JarOutputStream; 035 036import org.cleartk.ml.encoder.features.FeaturesEncoder; 037import org.cleartk.ml.encoder.features.FeaturesEncoder_ImplBase; 038import org.cleartk.ml.encoder.outcome.OutcomeEncoder; 039import org.cleartk.util.ReflectionUtil; 040 041/** 042 * Superclass for builders which write to a training data file using {@link FeaturesEncoder}s and 043 * {@link OutcomeEncoder}s, and package classifiers as jar files. 044 * 045 * Subclasses will typically override: 046 * <ul> 047 * <li>{@link #saveToTrainingDirectory(File)} to add items to the model training directory</li> 048 * <li>{@link #packageClassifier(File, JarOutputStream)} to copy items to the classifier jar</li> 049 * <li>{@link #unpackageClassifier(JarInputStream)} to load items from the classifier jar</li> 050 * <li>{@link #newClassifier()} to create a classifier from the loaded attributes</li> 051 * </ul> 052 * 053 * <br> 054 * Copyright (c) 2011, Regents of the University of Colorado <br> 055 * All rights reserved. 056 * 057 * @author Steven Bethard 058 */ 059public abstract class EncodingJarClassifierBuilder<CLASSIFIER_TYPE, ENCODED_FEATURES_TYPE, OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> 060 extends JarClassifierBuilder<CLASSIFIER_TYPE> { 061 062 private static final String ENCODERS_FILE_NAME = FeaturesEncoder_ImplBase.ENCODERS_FILE_NAME; 063 064 public static File getEncodersFile(File dir) { 065 return new File(dir, ENCODERS_FILE_NAME); 066 } 067 068 protected FeaturesEncoder<ENCODED_FEATURES_TYPE> featuresEncoder; 069 070 public FeaturesEncoder<ENCODED_FEATURES_TYPE> getFeaturesEncoder() { 071 return featuresEncoder; 072 } 073 074 public void setFeaturesEncoder(FeaturesEncoder<ENCODED_FEATURES_TYPE> featuresEncoder) { 075 this.featuresEncoder = featuresEncoder; 076 } 077 078 protected OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> outcomeEncoder; 079 080 public OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> getOutcomeEncoder() { 081 return outcomeEncoder; 082 } 083 084 public void setOutcomeEncoder(OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> outcomeEncoder) { 085 this.outcomeEncoder = outcomeEncoder; 086 } 087 088 public abstract File getTrainingDataFile(File dir); 089 090 @Override 091 public void saveToTrainingDirectory(File dir) throws IOException { 092 super.saveToTrainingDirectory(dir); 093 // finalize the encoder feature set 094 this.featuresEncoder.finalizeFeatureSet(dir); 095 this.outcomeEncoder.finalizeOutcomeSet(dir); 096 097 // save the encoders to the directory 098 File encodersFile = getEncodersFile(dir); 099 OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(encodersFile)); 100 ObjectOutputStream os = new ObjectOutputStream(outputStream); 101 os.writeObject(this.featuresEncoder); 102 os.writeObject(this.outcomeEncoder); 103 os.close(); 104 outputStream.close(); 105 106 } 107 108 @Override 109 protected void packageClassifier(File dir, JarOutputStream modelStream) throws IOException { 110 super.packageClassifier(dir, modelStream); 111 JarStreams.putNextJarEntry(modelStream, ENCODERS_FILE_NAME, getEncodersFile(dir)); 112 } 113 114 @Override 115 protected void unpackageClassifier(JarInputStream modelStream) throws IOException { 116 super.unpackageClassifier(modelStream); 117 JarStreams.getNextJarEntry(modelStream, ENCODERS_FILE_NAME); 118 ObjectInputStream is = new ObjectInputStream(modelStream); 119 try { 120 this.featuresEncoder = this.featuresEncoderCast(is.readObject()); 121 this.outcomeEncoder = this.outcomeEncoderCast(is.readObject()); 122 } catch (ClassNotFoundException e) { 123 throw new RuntimeException("Classes not found for serialized encoder objects", e); 124 } 125 } 126 127 @SuppressWarnings("unchecked") 128 private FeaturesEncoder<ENCODED_FEATURES_TYPE> featuresEncoderCast(Object object) { 129 FeaturesEncoder<ENCODED_FEATURES_TYPE> encoder = (FeaturesEncoder<ENCODED_FEATURES_TYPE>) object; 130 131 ReflectionUtil.checkTypeParametersAreEqual( 132 EncodingJarClassifierBuilder.class, 133 "ENCODED_FEATURES_TYPE", 134 this, 135 FeaturesEncoder.class, 136 "ENCODED_FEATURES_TYPE", 137 encoder, 138 ClassCastException.class); 139 140 return encoder; 141 } 142 143 @SuppressWarnings("unchecked") 144 private OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> outcomeEncoderCast(Object object) { 145 OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE> encoder; 146 encoder = (OutcomeEncoder<OUTCOME_TYPE, ENCODED_OUTCOME_TYPE>) object; 147 148 ReflectionUtil.checkTypeParametersAreEqual( 149 EncodingJarClassifierBuilder.class, 150 "OUTCOME_TYPE", 151 this, 152 OutcomeEncoder.class, 153 "OUTCOME_TYPE", 154 encoder, 155 ClassCastException.class); 156 157 ReflectionUtil.checkTypeParametersAreEqual( 158 EncodingJarClassifierBuilder.class, 159 "ENCODED_OUTCOME_TYPE", 160 this, 161 OutcomeEncoder.class, 162 "ENCODED_OUTCOME_TYPE", 163 encoder, 164 ClassCastException.class); 165 166 return encoder; 167 } 168}