001/** 
002 * Copyright (c) 2009-2011, Regents of the University of Colorado 
003 * All rights reserved.
004 * 
005 * Redistribution and use in source and binary forms, with or without
006 * modification, are permitted provided that the following conditions are met:
007 * 
008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
011 * 
012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
022 * POSSIBILITY OF SUCH DAMAGE. 
023 */
024package org.cleartk.ml;
025
026import java.io.IOException;
027
028import org.apache.uima.UimaContext;
029import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
030import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
031import org.apache.uima.fit.descriptor.ConfigurationParameter;
032import org.apache.uima.fit.factory.initializable.Initializable;
033import org.apache.uima.fit.factory.initializable.InitializableFactory;
034import org.apache.uima.resource.ResourceInitializationException;
035import org.cleartk.ml.jar.DirectoryDataWriterFactory;
036import org.cleartk.ml.jar.GenericJarClassifierFactory;
037import org.cleartk.util.CleartkInitializationException;
038import org.cleartk.util.ReflectionUtil;
039
040/**
041 * <br>
042 * Copyright (c) 2009-2011, Regents of the University of Colorado <br>
043 * All rights reserved.
044 */
045public abstract class CleartkAnnotator<OUTCOME_TYPE> extends JCasAnnotator_ImplBase implements
046    Initializable {
047
048  public static final String PARAM_CLASSIFIER_FACTORY_CLASS_NAME = "classifierFactoryClassName";
049
050  private static final String DEFAULT_CLASSIFIER_FACTORY_CLASS_NAME = "org.cleartk.ml.jar.JarClassifierFactory";
051
052  @ConfigurationParameter(
053      name = PARAM_CLASSIFIER_FACTORY_CLASS_NAME,
054      mandatory = false,
055      description = "provides the full name of the ClassifierFactory class to be used.",
056      defaultValue = DEFAULT_CLASSIFIER_FACTORY_CLASS_NAME)
057  private String classifierFactoryClassName;
058
059  public static final String PARAM_DATA_WRITER_FACTORY_CLASS_NAME = "dataWriterFactoryClassName";
060
061  private static final String DEFAULT_DATA_WRITER_FACTORY_CLASS_NAME = "org.cleartk.ml.jar.DefaultDataWriterFactory";
062
063  @ConfigurationParameter(
064      name = PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
065      mandatory = false,
066      description = "provides the full name of the DataWriterFactory class to be used.",
067      defaultValue = DEFAULT_DATA_WRITER_FACTORY_CLASS_NAME)
068  private String dataWriterFactoryClassName;
069
070  public static final String PARAM_IS_TRAINING = "isTraining";
071
072  @ConfigurationParameter(
073      name = PARAM_IS_TRAINING,
074      mandatory = false,
075      description = "determines whether this annotator is writing training data or using a classifier to annotate. Normally inferred automatically based on whether or not a DataWriterFactory class has been set.")
076  private Boolean isTraining;
077
078  private boolean primitiveIsTraining;
079
080  protected Classifier<OUTCOME_TYPE> classifier;
081
082  protected DataWriter<OUTCOME_TYPE> dataWriter;
083
084  @Override
085  public void initialize(UimaContext context) throws ResourceInitializationException {
086    super.initialize(context);
087
088    if (dataWriterFactoryClassName == null && classifierFactoryClassName == null) {
089      CleartkInitializationException.neitherParameterSet(
090          PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
091          dataWriterFactoryClassName,
092          PARAM_CLASSIFIER_FACTORY_CLASS_NAME,
093          classifierFactoryClassName);
094    }
095
096    // determine whether we start out as training or predicting
097    if (this.isTraining != null) {
098      this.primitiveIsTraining = this.isTraining;
099    } else if (!DEFAULT_DATA_WRITER_FACTORY_CLASS_NAME.equals(this.dataWriterFactoryClassName)) {
100      this.primitiveIsTraining = true;
101    } else if (context.getConfigParameterValue(DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY) != null) {
102      this.primitiveIsTraining = true;
103    } else if (!DEFAULT_CLASSIFIER_FACTORY_CLASS_NAME.equals(this.classifierFactoryClassName)) {
104      this.primitiveIsTraining = false;
105    } else if (context.getConfigParameterValue(GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH) != null) {
106      this.primitiveIsTraining = false;
107    } else {
108      String message = "Please specify PARAM_IS_TRAINING - unable to infer it from context";
109      throw new IllegalArgumentException(message);
110    }
111
112    if (this.isTraining()) {
113      // create the factory and instantiate the data writer
114      DataWriterFactory<?> factory = InitializableFactory.create(
115          context,
116          dataWriterFactoryClassName,
117          DataWriterFactory.class);
118      DataWriter<?> untypedDataWriter;
119      try {
120        untypedDataWriter = factory.createDataWriter();
121      } catch (IOException e) {
122        throw new ResourceInitializationException(e);
123      }
124
125      InitializableFactory.initialize(untypedDataWriter, context);
126      this.dataWriter = ReflectionUtil.uncheckedCast(untypedDataWriter);
127    } else {
128      // create the factory and instantiate the classifier
129      ClassifierFactory<?> factory = InitializableFactory.create(
130          context,
131          classifierFactoryClassName,
132          ClassifierFactory.class);
133      Classifier<?> untypedClassifier;
134      try {
135        untypedClassifier = factory.createClassifier();
136      } catch (IOException e) {
137        throw new ResourceInitializationException(e);
138      }
139
140      this.classifier = ReflectionUtil.uncheckedCast(untypedClassifier);
141      ReflectionUtil.checkTypeParameterIsAssignable(
142          CleartkAnnotator.class,
143          "OUTCOME_TYPE",
144          this,
145          Classifier.class,
146          "OUTCOME_TYPE",
147          this.classifier);
148      InitializableFactory.initialize(untypedClassifier, context);
149    }
150  }
151
152  @Override
153  public void collectionProcessComplete() throws AnalysisEngineProcessException {
154    super.collectionProcessComplete();
155    if (isTraining()) {
156      dataWriter.finish();
157    }
158  }
159
160  protected boolean isTraining() {
161    return this.primitiveIsTraining;
162  }
163
164}