001/** 002 * Copyright (c) 2007-2008, Regents of the University of Colorado 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions are met: 007 * 008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 011 * 012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 022 * POSSIBILITY OF SUCH DAMAGE. 023 */ 024package org.cleartk.ml.viterbi; 025 026import java.lang.reflect.Type; 027import java.util.ArrayList; 028import java.util.Collection; 029import java.util.Collections; 030import java.util.List; 031import java.util.Map; 032 033import org.apache.uima.UimaContext; 034import org.apache.uima.fit.component.initialize.ConfigurationParameterInitializer; 035import org.apache.uima.fit.descriptor.ConfigurationParameter; 036import org.apache.uima.fit.factory.initializable.Initializable; 037import org.apache.uima.resource.ResourceInitializationException; 038import org.cleartk.ml.Classifier; 039import org.cleartk.ml.CleartkProcessingException; 040import org.cleartk.ml.Feature; 041import org.cleartk.ml.SequenceClassifier; 042import org.cleartk.util.CleartkInitializationException; 043import org.cleartk.util.ReflectionUtil; 044import org.cleartk.util.ReflectionUtil.TypeArgumentDelegator; 045 046import com.google.common.base.Functions; 047import com.google.common.base.Objects; 048import com.google.common.base.Objects.ToStringHelper; 049import com.google.common.collect.Lists; 050import com.google.common.collect.Maps; 051import com.google.common.collect.Ordering; 052import com.google.common.primitives.Doubles; 053 054/** 055 * <br> 056 * Copyright (c) 2007-2008, Regents of the University of Colorado <br> 057 * All rights reserved. 058 */ 059 060public class ViterbiClassifier<OUTCOME_TYPE> implements SequenceClassifier<OUTCOME_TYPE>, 061 Initializable, TypeArgumentDelegator { 062 063 protected Classifier<OUTCOME_TYPE> delegatedClassifier; 064 065 protected OutcomeFeatureExtractor[] outcomeFeatureExtractors; 066 067 public static final String PARAM_STACK_SIZE = "stackSize"; 068 069 @ConfigurationParameter( 070 name = PARAM_STACK_SIZE, 071 description = "specifies the maximum number of candidate paths to " 072 + "keep track of. In general, this number should be higher than the number " 073 + "of possible classifications at any given point in the sequence. This " 074 + "guarantees that highest-possible scoring sequence will be returned. If, " 075 + "however, the number of possible classifications is quite high and/or you " 076 + "are concerned about throughput performance, then you may want to reduce the number " 077 + "of candidate paths to maintain. If Classifier.score is not implemented for the given delegated classifier, then " 078 + "the value of this parameter must be 1. ", 079 defaultValue = "1") 080 protected int stackSize; 081 082 public static final String PARAM_ADD_SCORES = "addScores"; 083 084 @ConfigurationParameter( 085 name = PARAM_ADD_SCORES, 086 description = "specifies whether the scores of candidate sequence classifications should be " 087 + "calculated by summing classfication scores for each member of the sequence or by multiplying them. A value of " 088 + "true means that the scores will be summed. A value of false means that the scores will be multiplied. ", 089 defaultValue = "false") 090 protected boolean addScores = false; 091 092 public ViterbiClassifier( 093 Classifier<OUTCOME_TYPE> delegatedClassifier, 094 OutcomeFeatureExtractor[] outcomeFeatureExtractors) { 095 this.delegatedClassifier = delegatedClassifier; 096 this.outcomeFeatureExtractors = outcomeFeatureExtractors; 097 } 098 099 public void initialize(UimaContext context) throws ResourceInitializationException { 100 ConfigurationParameterInitializer.initialize(this, context); 101 if (stackSize < 1) { 102 throw CleartkInitializationException.parameterLessThan(PARAM_STACK_SIZE, 1, stackSize); 103 } 104 } 105 106 public List<OUTCOME_TYPE> classify(List<List<Feature>> features) 107 throws CleartkProcessingException { 108 if (stackSize == 1) { 109 List<Object> outcomes = new ArrayList<Object>(); 110 List<OUTCOME_TYPE> returnValues = new ArrayList<OUTCOME_TYPE>(); 111 for (List<Feature> instanceFeatures : features) { 112 for (OutcomeFeatureExtractor outcomeFeatureExtractor : outcomeFeatureExtractors) { 113 instanceFeatures.addAll(outcomeFeatureExtractor.extractFeatures(outcomes)); 114 } 115 OUTCOME_TYPE outcome = delegatedClassifier.classify(instanceFeatures); 116 outcomes.add(outcome); 117 returnValues.add(outcome); 118 } 119 return returnValues; 120 } else { 121 try { 122 return viterbi(features); 123 } catch (UnsupportedOperationException uoe) { 124 throw CleartkProcessingException.unsupportedOperationSetParameter( 125 uoe, 126 delegatedClassifier, 127 "score", 128 PARAM_STACK_SIZE, 129 1); 130 } 131 } 132 133 } 134 135 /** 136 * This implementation of Viterbi requires at most stackSize * sequenceLength calls to the 137 * classifier. If this proves to be too expensive, then consider using a smaller stack size. 138 * 139 * @param featureLists 140 * a sequence-worth of features. Each List<Feature> in features should corresond to all 141 * of the features for a given element in a sequence to be classified. 142 * @return a list of outcomes (classifications) - one classification for each member of the 143 * sequence. 144 * @see #PARAM_STACK_SIZE 145 * @see OutcomeFeatureExtractor 146 */ 147 public List<OUTCOME_TYPE> viterbi(List<List<Feature>> featureLists) 148 throws CleartkProcessingException { 149 150 if (featureLists == null || featureLists.size() == 0) { 151 return Collections.emptyList(); 152 } 153 154 // find the best paths through the outcome lattice 155 Collection<Path> paths = null; 156 for (List<Feature> features : featureLists) { 157 158 // if this is the first instance, start new paths for each outcome 159 if (paths == null) { 160 paths = Lists.newArrayList(); 161 Map<OUTCOME_TYPE, Double> scoredOutcomes = this.getScoredOutcomes(features, null); 162 for (OUTCOME_TYPE outcome : this.getTopOutcomes(scoredOutcomes)) { 163 paths.add(new Path(outcome, scoredOutcomes.get(outcome), null)); 164 } 165 } 166 167 // for later instances, find the best previous path for each outcome 168 else { 169 Map<OUTCOME_TYPE, Path> maxPaths = Maps.newHashMap(); 170 for (Path path : paths) { 171 Map<OUTCOME_TYPE, Double> scoredOutcomes = this.getScoredOutcomes(features, path); 172 for (OUTCOME_TYPE outcome : this.getTopOutcomes(scoredOutcomes)) { 173 double outcomeScore = scoredOutcomes.get(outcome); 174 double score = this.addScores ? path.score + outcomeScore : path.score * outcomeScore; 175 Path maxPath = maxPaths.get(outcome); 176 if (maxPath == null || score > maxPath.score) { 177 maxPaths.put(outcome, new Path(outcome, score, path)); 178 } 179 } 180 } 181 paths = maxPaths.values(); 182 } 183 } 184 185 // take the maximum of the final paths 186 return Collections.max(paths).outcomes; 187 } 188 189 @Override 190 public List<Map<OUTCOME_TYPE, Double>> score(List<List<Feature>> features) 191 throws CleartkProcessingException { 192 throw new UnsupportedOperationException(); 193 } 194 195 public Map<String, Type> getTypeArguments(Class<?> genericType) { 196 if (genericType.equals(SequenceClassifier.class)) { 197 genericType = Classifier.class; 198 } 199 return ReflectionUtil.getTypeArguments(genericType, this.delegatedClassifier); 200 } 201 202 private Map<OUTCOME_TYPE, Double> getScoredOutcomes(List<Feature> features, Path path) 203 throws CleartkProcessingException { 204 205 // add the features from preceding outcomes 206 features = Lists.newArrayList(features); 207 if (path != null) { 208 List<Object> previousOutcomes = new ArrayList<Object>(path.outcomes); 209 for (OutcomeFeatureExtractor outcomeFeatureExtractor : this.outcomeFeatureExtractors) { 210 features.addAll(outcomeFeatureExtractor.extractFeatures(previousOutcomes)); 211 } 212 } 213 214 // get the scored outcomes for this instance 215 Map<OUTCOME_TYPE, Double> scoredOutcomes = this.delegatedClassifier.score(features); 216 if (scoredOutcomes.isEmpty()) { 217 throw new IllegalStateException("expected at least one scored outcome, found " 218 + scoredOutcomes); 219 } 220 return scoredOutcomes; 221 } 222 223 private List<OUTCOME_TYPE> getTopOutcomes(Map<OUTCOME_TYPE, Double> scoredOutcomes) { 224 // get just the outcomes that fit within the stack 225 Ordering<OUTCOME_TYPE> ordering = Ordering.natural().onResultOf( 226 Functions.forMap(scoredOutcomes)); 227 return ordering.greatestOf(scoredOutcomes.keySet(), this.stackSize); 228 } 229 230 private class Path implements Comparable<Path> { 231 public OUTCOME_TYPE outcome; 232 233 public double score; 234 235 public Path parent; 236 237 public List<OUTCOME_TYPE> outcomes; 238 239 public Path(OUTCOME_TYPE outcome, double score, Path parent) { 240 this.outcome = outcome; 241 this.score = score; 242 this.parent = parent; 243 this.outcomes = Lists.newArrayList(); 244 if (this.parent != null) { 245 this.outcomes.addAll(this.parent.outcomes); 246 } 247 this.outcomes.add(this.outcome); 248 } 249 250 @Override 251 public String toString() { 252 ToStringHelper helper = Objects.toStringHelper(this); 253 helper.add("outcome", this.outcome); 254 helper.add("score", this.score); 255 helper.add("parent", this.parent); 256 return helper.toString(); 257 } 258 259 @Override 260 public int compareTo(Path that) { 261 return Doubles.compare(this.score, that.score); 262 } 263 } 264}