001/*
002 * Copyright (c) 2011, Regents of the University of Colorado 
003 * All rights reserved.
004 * 
005 * Redistribution and use in source and binary forms, with or without
006 * modification, are permitted provided that the following conditions are met:
007 * 
008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
011 * 
012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
022 * POSSIBILITY OF SUCH DAMAGE. 
023 */
024package org.cleartk.timeml.util;
025
026import java.io.File;
027import java.net.URL;
028import java.util.MissingResourceException;
029
030import org.apache.uima.analysis_engine.AnalysisEngineDescription;
031import org.apache.uima.resource.ResourceInitializationException;
032import org.cleartk.ml.DataWriter;
033import org.cleartk.ml.SequenceDataWriter;
034import org.cleartk.ml.jar.DefaultDataWriterFactory;
035import org.cleartk.ml.jar.DefaultSequenceDataWriterFactory;
036import org.cleartk.ml.jar.DirectoryDataWriterFactory;
037import org.cleartk.ml.jar.GenericJarClassifierFactory;
038import org.cleartk.ml.jar.JarClassifierBuilder;
039import org.apache.uima.fit.factory.ResourceCreationSpecifierFactory;
040
041/**
042 * 
043 * <br>
044 * Copyright (c) 2011, Regents of the University of Colorado <br>
045 * All rights reserved.
046 * 
047 * @author Steven Bethard
048 */
049public abstract class CleartkInternalModelFactory {
050
051  public abstract AnalysisEngineDescription getBaseDescription()
052      throws ResourceInitializationException;
053
054  public abstract Class<?> getAnnotatorClass();
055
056  public abstract Class<?> getDataWriterClass();
057
058  public File getTrainingDirectory() {
059    String path = this.getAnnotatorClass().getName().toLowerCase().replace('.', '/');
060    return new File("src/main/resources/" + path);
061  }
062
063  public URL getClassifierJarURL() {
064    String dirName = getAnnotatorClass().getSimpleName().toLowerCase();
065    File resourceFile = JarClassifierBuilder.getModelJarFile(dirName);
066    String resourceName = resourceFile.getPath().replaceAll("\\\\", "/");
067    URL url = this.getAnnotatorClass().getResource(resourceName);
068    if (url == null) {
069      String className = this.getAnnotatorClass().getName();
070      String format = "No classifier jar found at \"%s\" for class %s";
071      String message = String.format(format, resourceName, className);
072      throw new MissingResourceException(message, className, resourceName);
073    }
074    return url;
075  }
076
077  public AnalysisEngineDescription getWriterDescription(File outputDirectory)
078      throws ResourceInitializationException {
079    Class<?> dataWriterClass = this.getDataWriterClass();
080    String paramName;
081    if (SequenceDataWriter.class.isAssignableFrom(dataWriterClass)) {
082      paramName = DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME;
083    } else if (DataWriter.class.isAssignableFrom(dataWriterClass)) {
084      paramName = DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME;
085    } else {
086      throw new RuntimeException("Invalid data writer class: " + dataWriterClass);
087    }
088    AnalysisEngineDescription desc = getBaseDescription();
089    ResourceCreationSpecifierFactory.setConfigurationParameters(
090        desc,
091        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
092        outputDirectory.getPath(),
093        paramName,
094        dataWriterClass.getName());
095    return desc;
096  }
097
098  public AnalysisEngineDescription getWriterDescription() throws ResourceInitializationException {
099    return getWriterDescription(this.getTrainingDirectory());
100  }
101
102  public AnalysisEngineDescription getAnnotatorDescription(String modelFileName)
103      throws ResourceInitializationException {
104    AnalysisEngineDescription desc = getBaseDescription();
105    ResourceCreationSpecifierFactory.setConfigurationParameters(
106        desc,
107        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
108        modelFileName);
109    return desc;
110  }
111
112  public AnalysisEngineDescription getAnnotatorDescription() throws ResourceInitializationException {
113    return getAnnotatorDescription(this.getClassifierJarURL().toString());
114  }
115}