001/** 002 * Copyright (c) 2009-2011, Regents of the University of Colorado 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions are met: 007 * 008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 011 * 012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 022 * POSSIBILITY OF SUCH DAMAGE. 023 */ 024package org.cleartk.ml; 025 026import java.io.IOException; 027 028import org.apache.uima.UimaContext; 029import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 030import org.apache.uima.fit.component.JCasAnnotator_ImplBase; 031import org.apache.uima.fit.descriptor.ConfigurationParameter; 032import org.apache.uima.fit.factory.initializable.Initializable; 033import org.apache.uima.fit.factory.initializable.InitializableFactory; 034import org.apache.uima.resource.ResourceInitializationException; 035import org.cleartk.ml.jar.DirectoryDataWriterFactory; 036import org.cleartk.ml.jar.GenericJarClassifierFactory; 037import org.cleartk.util.CleartkInitializationException; 038import org.cleartk.util.ReflectionUtil; 039 040/** 041 * <br> 042 * Copyright (c) 2009-2011, Regents of the University of Colorado <br> 043 * All rights reserved. 044 */ 045public abstract class CleartkAnnotator<OUTCOME_TYPE> extends JCasAnnotator_ImplBase implements 046 Initializable { 047 048 public static final String PARAM_CLASSIFIER_FACTORY_CLASS_NAME = "classifierFactoryClassName"; 049 050 private static final String DEFAULT_CLASSIFIER_FACTORY_CLASS_NAME = "org.cleartk.ml.jar.JarClassifierFactory"; 051 052 @ConfigurationParameter( 053 name = PARAM_CLASSIFIER_FACTORY_CLASS_NAME, 054 mandatory = false, 055 description = "provides the full name of the ClassifierFactory class to be used.", 056 defaultValue = DEFAULT_CLASSIFIER_FACTORY_CLASS_NAME) 057 private String classifierFactoryClassName; 058 059 public static final String PARAM_DATA_WRITER_FACTORY_CLASS_NAME = "dataWriterFactoryClassName"; 060 061 private static final String DEFAULT_DATA_WRITER_FACTORY_CLASS_NAME = "org.cleartk.ml.jar.DefaultDataWriterFactory"; 062 063 @ConfigurationParameter( 064 name = PARAM_DATA_WRITER_FACTORY_CLASS_NAME, 065 mandatory = false, 066 description = "provides the full name of the DataWriterFactory class to be used.", 067 defaultValue = DEFAULT_DATA_WRITER_FACTORY_CLASS_NAME) 068 private String dataWriterFactoryClassName; 069 070 public static final String PARAM_IS_TRAINING = "isTraining"; 071 072 @ConfigurationParameter( 073 name = PARAM_IS_TRAINING, 074 mandatory = false, 075 description = "determines whether this annotator is writing training data or using a classifier to annotate. Normally inferred automatically based on whether or not a DataWriterFactory class has been set.") 076 private Boolean isTraining; 077 078 private boolean primitiveIsTraining; 079 080 protected Classifier<OUTCOME_TYPE> classifier; 081 082 protected DataWriter<OUTCOME_TYPE> dataWriter; 083 084 @Override 085 public void initialize(UimaContext context) throws ResourceInitializationException { 086 super.initialize(context); 087 088 if (dataWriterFactoryClassName == null && classifierFactoryClassName == null) { 089 CleartkInitializationException.neitherParameterSet( 090 PARAM_DATA_WRITER_FACTORY_CLASS_NAME, 091 dataWriterFactoryClassName, 092 PARAM_CLASSIFIER_FACTORY_CLASS_NAME, 093 classifierFactoryClassName); 094 } 095 096 // determine whether we start out as training or predicting 097 if (this.isTraining != null) { 098 this.primitiveIsTraining = this.isTraining; 099 } else if (!DEFAULT_DATA_WRITER_FACTORY_CLASS_NAME.equals(this.dataWriterFactoryClassName)) { 100 this.primitiveIsTraining = true; 101 } else if (context.getConfigParameterValue(DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY) != null) { 102 this.primitiveIsTraining = true; 103 } else if (!DEFAULT_CLASSIFIER_FACTORY_CLASS_NAME.equals(this.classifierFactoryClassName)) { 104 this.primitiveIsTraining = false; 105 } else if (context.getConfigParameterValue(GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH) != null) { 106 this.primitiveIsTraining = false; 107 } else { 108 String message = "Please specify PARAM_IS_TRAINING - unable to infer it from context"; 109 throw new IllegalArgumentException(message); 110 } 111 112 if (this.isTraining()) { 113 // create the factory and instantiate the data writer 114 DataWriterFactory<?> factory = InitializableFactory.create( 115 context, 116 dataWriterFactoryClassName, 117 DataWriterFactory.class); 118 DataWriter<?> untypedDataWriter; 119 try { 120 untypedDataWriter = factory.createDataWriter(); 121 } catch (IOException e) { 122 throw new ResourceInitializationException(e); 123 } 124 125 InitializableFactory.initialize(untypedDataWriter, context); 126 this.dataWriter = ReflectionUtil.uncheckedCast(untypedDataWriter); 127 } else { 128 // create the factory and instantiate the classifier 129 ClassifierFactory<?> factory = InitializableFactory.create( 130 context, 131 classifierFactoryClassName, 132 ClassifierFactory.class); 133 Classifier<?> untypedClassifier; 134 try { 135 untypedClassifier = factory.createClassifier(); 136 } catch (IOException e) { 137 throw new ResourceInitializationException(e); 138 } 139 140 this.classifier = ReflectionUtil.uncheckedCast(untypedClassifier); 141 ReflectionUtil.checkTypeParameterIsAssignable( 142 CleartkAnnotator.class, 143 "OUTCOME_TYPE", 144 this, 145 Classifier.class, 146 "OUTCOME_TYPE", 147 this.classifier); 148 InitializableFactory.initialize(untypedClassifier, context); 149 } 150 } 151 152 @Override 153 public void collectionProcessComplete() throws AnalysisEngineProcessException { 154 super.collectionProcessComplete(); 155 if (isTraining()) { 156 dataWriter.finish(); 157 } 158 } 159 160 protected boolean isTraining() { 161 return this.primitiveIsTraining; 162 } 163 164}