001/*
002 * Copyright (c) 2013, Regents of the University of Colorado 
003 * All rights reserved.
004 * 
005 * Redistribution and use in source and binary forms, with or without
006 * modification, are permitted provided that the following conditions are met:
007 * 
008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
011 * 
012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
022 * POSSIBILITY OF SUCH DAMAGE. 
023 */
024package org.cleartk.timeml.eval;
025
026import java.io.File;
027import java.io.IOException;
028import java.net.URI;
029import java.util.LinkedHashSet;
030import java.util.List;
031import java.util.Map;
032import java.util.Queue;
033import java.util.Set;
034
035import org.apache.uima.UimaContext;
036import org.apache.uima.analysis_engine.AnalysisEngine;
037import org.apache.uima.analysis_engine.AnalysisEngineDescription;
038import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
039import org.apache.uima.cas.CAS;
040import org.apache.uima.cas.CASException;
041import org.apache.uima.cas.Feature;
042import org.apache.uima.collection.CollectionReader;
043import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
044import org.apache.uima.fit.component.ViewCreatorAnnotator;
045import org.apache.uima.fit.descriptor.ConfigurationParameter;
046import org.apache.uima.fit.factory.AggregateBuilder;
047import org.apache.uima.fit.factory.AnalysisEngineFactory;
048import org.apache.uima.fit.pipeline.JCasIterator;
049import org.apache.uima.fit.pipeline.SimplePipeline;
050import org.apache.uima.fit.util.JCasUtil;
051import org.apache.uima.jcas.JCas;
052import org.apache.uima.jcas.cas.TOP;
053import org.apache.uima.resource.ResourceInitializationException;
054import org.apache.uima.util.CasCopier;
055import org.cleartk.corpus.timeml.PlainTextTlinkGoldAnnotator;
056import org.cleartk.corpus.timeml.TempEval2013Writer;
057import org.cleartk.corpus.timeml.TimeMlGoldAnnotator;
058import org.cleartk.eval.AnnotationStatistics;
059import org.cleartk.eval.Evaluation_ImplBase;
060import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
061import org.cleartk.opennlp.tools.ParserAnnotator;
062import org.cleartk.opennlp.tools.PosTaggerAnnotator;
063import org.cleartk.opennlp.tools.SentenceAnnotator;
064import org.cleartk.snowball.DefaultSnowballStemmer;
065import org.cleartk.timeml.event.EventAnnotator;
066import org.cleartk.timeml.event.EventAspectAnnotator;
067import org.cleartk.timeml.event.EventClassAnnotator;
068import org.cleartk.timeml.event.EventModalityAnnotator;
069import org.cleartk.timeml.event.EventPolarityAnnotator;
070import org.cleartk.timeml.event.EventTenseAnnotator;
071import org.cleartk.timeml.time.TimeAnnotator;
072import org.cleartk.timeml.time.TimeTypeAnnotator;
073import org.cleartk.timeml.tlink.TemporalLinkAnnotator_ImplBase;
074import org.cleartk.timeml.tlink.TemporalLinkEventToDocumentCreationTimeAnnotator;
075import org.cleartk.timeml.tlink.TemporalLinkEventToSameSentenceTimeAnnotator;
076import org.cleartk.timeml.tlink.TemporalLinkEventToSubordinatedEventAnnotator;
077import org.cleartk.timeml.type.Anchor;
078import org.cleartk.timeml.type.DocumentCreationTime;
079import org.cleartk.timeml.type.Event;
080import org.cleartk.timeml.type.TemporalLink;
081import org.cleartk.timeml.type.Text;
082import org.cleartk.timeml.type.Time;
083import org.cleartk.token.tokenizer.TokenAnnotator;
084import org.cleartk.util.ViewUriUtil;
085import org.cleartk.util.ae.UriToDocumentTextAnnotator;
086import org.cleartk.util.cr.UriCollectionReader;
087import org.jdom2.Document;
088import org.jdom2.Element;
089import org.jdom2.JDOMException;
090import org.jdom2.filter.Filters;
091import org.jdom2.input.SAXBuilder;
092import org.jdom2.output.XMLOutputter;
093
094import com.google.common.base.Function;
095import com.google.common.collect.ImmutableMultimap;
096import com.google.common.collect.ImmutableTable;
097import com.google.common.collect.LinkedHashMultimap;
098import com.google.common.collect.Lists;
099import com.google.common.collect.Maps;
100import com.google.common.collect.Multimap;
101import com.google.common.collect.Queues;
102import com.google.common.collect.Sets;
103import com.google.common.collect.Table;
104import com.lexicalscope.jewel.cli.CliFactory;
105import com.lexicalscope.jewel.cli.Option;
106
107/**
108 * Trains and evaluates event, time and temporal relation models on the TempEval 2013 data.
109 * 
110 * <br>
111 * Copyright (c) 2013, Regents of the University of Colorado <br>
112 * All rights reserved.
113 * 
114 * @author Steven Bethard
115 */
116public class TempEval2013Evaluation
117    extends
118    Evaluation_ImplBase<File, ImmutableTable<Model<?>, Model.Params, AnnotationStatistics<String>>> {
119
120  interface Options {
121
122    @Option(longName = "train-dirs")
123    List<File> getTrainDirectories();
124
125    @Option(longName = "test-dirs", defaultToNull = true)
126    List<File> getTestDirectories();
127
128    @Option(longName = "inferred-tlinks", defaultToNull = true)
129    List<File> getInferredTLinksDirectories();
130
131    @Option(longName = "verb-clause-tlinks")
132    boolean getVerbClauseTLinks();
133    
134    @Option(longName = "relations-only")
135    boolean getRelationsOnly();
136
137    @Option(longName = "tune", defaultToNull = true)
138    String getNameOfModelToTune();
139    
140    @Option(longName = "train-only")
141    boolean getTrainOnly();
142  }
143
144  public static void main(String[] args) throws Exception {
145    Options options = CliFactory.parseArguments(Options.class, args);
146
147    List<File> trainFiles = listAllFiles(options.getTrainDirectories());
148    List<File> testFiles = listAllFiles(options.getTestDirectories());
149
150    // map names to models
151    List<Model<?>> allModels = Lists.<Model<?>> newArrayList(
152        TIME_EXTENT_MODEL,
153        TIME_TYPE_MODEL,
154        EVENT_EXTENT_MODEL,
155        EVENT_ASPECT_MODEL,
156        EVENT_CLASS_MODEL,
157        EVENT_MODALITY_MODEL,
158        EVENT_POLARITY_MODEL,
159        EVENT_TENSE_MODEL,
160        TLINK_EVENT_DOCTIME_MODEL,
161        TLINK_EVENT_SENTTIME_MODEL,
162        TLINK_EVENT_SUBORDEVENT_MODEL);
163    Map<String, Model<?>> nameToModel = Maps.newHashMap();
164    for (Model<?> model : allModels) {
165      nameToModel.put(model.name, model);
166    }
167
168    // determine which parameters each model should be trained with
169    ImmutableMultimap.Builder<Model<?>, Model.Params> modelsBuilder = ImmutableMultimap.builder();
170    String nameOfModelToTune = options.getNameOfModelToTune();
171    if (nameOfModelToTune == null) {
172      for (Model<?> model : allModels) {
173        if (!options.getRelationsOnly() || model.name.startsWith("tlink")) {
174          modelsBuilder.put(model, model.bestParams);
175        }
176      }
177    } else {
178      Model<?> modelToTune = nameToModel.get(nameOfModelToTune);
179      if (modelToTune == null) {
180        throw new IllegalArgumentException("No such model: " + nameOfModelToTune);
181      }
182      for (Model<?> model : getSortedPrerequisites(modelToTune)) {
183        if (!options.getRelationsOnly() || model.name.startsWith("tlink")) {
184          modelsBuilder.put(model, model.bestParams);
185        }
186      }
187      for (Model.Params params : modelToTune.paramsToSearch) {
188        modelsBuilder.put(modelToTune, params);
189      }
190    }
191    ImmutableMultimap<Model<?>, Model.Params> models = modelsBuilder.build();
192
193    // create the evaluation manager
194    File evalDir = new File("target/tempeval2013");
195    TempEval2013Evaluation evaluation = new TempEval2013Evaluation(
196        evalDir,
197        models,
198        options.getInferredTLinksDirectories(),
199        options.getVerbClauseTLinks(),
200        options.getRelationsOnly());
201
202    // just train a model
203    if (options.getTrainOnly()) {
204      if (!testFiles.isEmpty()) {
205        throw new IllegalArgumentException("Cannot specify test files when only training");
206      }
207      evaluation.train(evaluation.getCollectionReader(trainFiles), Model.DEFAULT_DIRECTORY);
208      for (Model<?> model : models.keySet()) {
209        for (Model.Params params : models.get(model)) {
210          model.cleanTrainingFiles(Model.DEFAULT_DIRECTORY, params);
211        }
212      }
213    } else {
214
215      // run a simple train-and-test
216      ImmutableTable<Model<?>, Model.Params, AnnotationStatistics<String>> modelStats;
217      if (!testFiles.isEmpty()) {
218        modelStats = evaluation.trainAndTest(trainFiles, testFiles);
219      }
220  
221      // run a cross-validation
222      else {
223        List<ImmutableTable<Model<?>, Model.Params, AnnotationStatistics<String>>> foldStats;
224        foldStats = evaluation.crossValidation(trainFiles, 2);
225  
226        // prepare a table of stats for all models and parameters
227        ImmutableTable.Builder<Model<?>, Model.Params, AnnotationStatistics<String>> modelStatsBuilder = ImmutableTable.builder();
228        for (Model<?> model : models.keySet()) {
229          for (Model.Params params : models.get(model)) {
230            modelStatsBuilder.put(model, params, new AnnotationStatistics<String>());
231          }
232        }
233        modelStats = modelStatsBuilder.build();
234  
235        // combine all fold stats into a single overall stats
236        for (Table<Model<?>, Model.Params, AnnotationStatistics<String>> foldTable : foldStats) {
237          for (Table.Cell<Model<?>, Model.Params, AnnotationStatistics<String>> cell : foldTable.cellSet()) {
238            modelStats.get(cell.getRowKey(), cell.getColumnKey()).addAll(cell.getValue());
239          }
240        }
241      }
242  
243      // print out all model performance
244      for (Model<?> model : models.keySet()) {
245        for (Model.Params params : modelStats.row(model).keySet()) {
246          System.err.printf("== %s %s ==\n", model.name, params);
247          System.err.println(modelStats.get(model, params));
248        }
249      }
250    }
251  }
252
253  private static List<File> listAllFiles(List<File> directories) {
254    List<File> files = Lists.newArrayList();
255    if (directories != null) {
256      for (File dir : directories) {
257        for (File file : dir.listFiles()) {
258          if (!file.getName().startsWith(".") && !file.isHidden()) {
259            files.add(file);
260          }
261        }
262      }
263    }
264    return files;
265  }
266  
267  private static Set<Model<?>> getPrerequisites(Model<?> model) {
268    Set<Model<?>> prereqs = Sets.newLinkedHashSet();
269    for (Model<?> prereq : model.prerequisites) {
270      prereqs.add(prereq);
271      prereqs.addAll(getPrerequisites(prereq));
272    }
273    return prereqs;
274  }
275
276  private static LinkedHashSet<Model<?>> getSortedPrerequisites(Model<?> model) {
277    Queue<Model<?>> todo = Queues.newArrayDeque();
278    Multimap<Model<?>, Model<?>> following = LinkedHashMultimap.create();
279    for (Model<?> prereq : getPrerequisites(model)) {
280      if (prereq.prerequisites.isEmpty()) {
281        todo.add(prereq);
282      } else {
283        for (Model<?> preprereq : prereq.prerequisites) {
284          following.put(preprereq, prereq);
285        }
286      }
287    }
288    LinkedHashSet<Model<?>> models = Sets.newLinkedHashSet();
289    while (!todo.isEmpty()) {
290      Model<?> next = todo.iterator().next();
291      todo.remove(next);
292      models.add(next);
293      for (Model<?> prereq : following.removeAll(next)) {
294        if (!following.containsKey(prereq)) {
295          todo.add(prereq);
296        }
297      }
298    }
299    return models;
300  }
301
302  private static Function<TemporalLink, List<Integer>> TEMPORAL_LINK_TO_SPANS = new Function<TemporalLink, List<Integer>>() {
303    @Override
304    public List<Integer> apply(TemporalLink temporalLink) {
305      // order source and target indexes, left-to-right
306      Anchor source = temporalLink.getSource();
307      Anchor target = temporalLink.getTarget();
308      return source.getBegin() < target.getBegin()
309          ? Lists.newArrayList(source.getBegin(), source.getEnd(), target.getBegin(), target.getEnd())
310          : Lists.newArrayList(target.getBegin(), target.getEnd(), source.getBegin(), source.getEnd());
311    }
312  };
313  
314  private static Function<TemporalLink, String> TEMPORAL_LINK_TO_RELATION = new Function<TemporalLink, String>() {
315    @Override
316    public String apply(TemporalLink temporalLink) {
317      // match relation with left-to-right ordering of indexes
318      Anchor source = temporalLink.getSource();
319      Anchor target = temporalLink.getTarget();
320      return source.getBegin() < target.getBegin()
321          ? temporalLink.getRelationType()
322          : TemporalLinkAnnotator_ImplBase.REVERSE_RELATION.get(temporalLink.getRelationType());
323    }
324  };
325
326  private static List<Model.Params> SEQUENCE_CLASSIFIER_PARAM_SEARCH_SPACE = Lists.newArrayList(
327      // L2-regularized L2-loss support vector classification (dual)
328      new Model.Params(LibLinearStringOutcomeDataWriter.class, 1, "-c", "0.1", "-s", "1"),
329      new Model.Params(LibLinearStringOutcomeDataWriter.class, 1, "-c", "0.5", "-s", "1"),
330      new Model.Params(LibLinearStringOutcomeDataWriter.class, 1, "-c", "1", "-s", "1"),
331      new Model.Params(LibLinearStringOutcomeDataWriter.class, 1, "-c", "5", "-s", "1"),
332      new Model.Params(LibLinearStringOutcomeDataWriter.class, 1, "-c", "10", "-s", "1"),
333      new Model.Params(LibLinearStringOutcomeDataWriter.class, 1, "-c", "50", "-s", "1"),
334      new Model.Params(LibLinearStringOutcomeDataWriter.class, 2, "-c", "0.1", "-s", "1"),
335      new Model.Params(LibLinearStringOutcomeDataWriter.class, 2, "-c", "0.5", "-s", "1"),
336      new Model.Params(LibLinearStringOutcomeDataWriter.class, 2, "-c", "1", "-s", "1"),
337      new Model.Params(LibLinearStringOutcomeDataWriter.class, 2, "-c", "5", "-s", "1"),
338      new Model.Params(LibLinearStringOutcomeDataWriter.class, 2, "-c", "10", "-s", "1"),
339      new Model.Params(LibLinearStringOutcomeDataWriter.class, 2, "-c", "50", "-s", "1"),
340      new Model.Params(LibLinearStringOutcomeDataWriter.class, 3, "-c", "0.1", "-s", "1"),
341      new Model.Params(LibLinearStringOutcomeDataWriter.class, 3, "-c", "0.5", "-s", "1"),
342      new Model.Params(LibLinearStringOutcomeDataWriter.class, 3, "-c", "1", "-s", "1"),
343      new Model.Params(LibLinearStringOutcomeDataWriter.class, 3, "-c", "5", "-s", "1"),
344      new Model.Params(LibLinearStringOutcomeDataWriter.class, 3, "-c", "10", "-s", "1"),
345      new Model.Params(LibLinearStringOutcomeDataWriter.class, 3, "-c", "50", "-s", "1"));
346//      // default is --iterations 500 --gaussian-variance 10
347//      new Model.Params(MalletCRFStringOutcomeDataWriter.class),
348//      new Model.Params(MalletCRFStringOutcomeDataWriter.class, "--forbidden", "O,I"),
349//      new Model.Params(MalletCRFStringOutcomeDataWriter.class, "--iterations", "100"),
350//      new Model.Params(MalletCRFStringOutcomeDataWriter.class, "--iterations", "1000"),
351//      new Model.Params(MalletCRFStringOutcomeDataWriter.class, "--gaussian-variance", "1"),
352//      new Model.Params(MalletCRFStringOutcomeDataWriter.class, "--gaussian-variance", "100"));
353
354//  private static final String priorFlag = "--gaussianPriorVariance";
355
356  private static List<Model.Params> CLASSIFIER_PARAM_SEARCH_SPACE = Lists.newArrayList(
357//      // default is --gaussianPriorVariance 1
358//      new Model.Params(MalletStringOutcomeDataWriter.class, "MaxEnt"),
359//      new Model.Params(MalletStringOutcomeDataWriter.class, "MaxEnt", priorFlag, "0.1"),
360//      new Model.Params(MalletStringOutcomeDataWriter.class, "MaxEnt", priorFlag, "10"),
361//      // default is [iterations cutoff] 100 5
362//      new Model.Params(MaxentStringOutcomeDataWriter.class),
363//      new Model.Params(MaxentStringOutcomeDataWriter.class, "100", "10"),
364//      new Model.Params(MaxentStringOutcomeDataWriter.class, "500", "5"),
365      // L2-regularized logistic regression (primal)
366      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "0.1", "-s", "0"),
367      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "0.5", "-s", "0"),
368      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "1", "-s", "0"),
369      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "5", "-s", "0"),
370      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "10", "-s", "0"),
371      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "50", "-s", "0"),
372      // L2-regularized L2-loss support vector classification (dual)
373      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "0.1", "-s", "1"),
374      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "0.5", "-s", "1"),
375      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "1", "-s", "1"),
376      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "5", "-s", "1"),
377      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "10", "-s", "1"),
378      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "50", "-s", "1"));
379
380  private static final Model<Time> TIME_EXTENT_MODEL = new Model<Time>(
381      "time-extent",
382      Lists.<Model<?>> newArrayList(),
383      TimeAnnotator.class,
384      new Model.Params(LibLinearStringOutcomeDataWriter.class, 1, "-c", "0.1", "-s", "1"),
385      SEQUENCE_CLASSIFIER_PARAM_SEARCH_SPACE,
386      Model.EvaluationType.NORMAL,
387      Model.LoggingType.NONE,
388      Time.class,
389      AnnotationStatistics.<Time> annotationToSpan(),
390      AnnotationStatistics.<Time, String> annotationToNull(),
391      null);
392
393  private static final Model<Time> TIME_TYPE_MODEL = new Model<Time>(
394      "time-type",
395      Lists.<Model<?>> newArrayList(TIME_EXTENT_MODEL),
396      TimeTypeAnnotator.class,
397      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "5", "-s", "0"),
398      CLASSIFIER_PARAM_SEARCH_SPACE,
399      Model.EvaluationType.NORMAL,
400      Model.LoggingType.NONE,
401      Time.class,
402      AnnotationStatistics.<Time> annotationToSpan(),
403      AnnotationStatistics.<Time> annotationToFeatureValue("timeType"),
404      "timeType");
405
406  private static final Model<Event> EVENT_EXTENT_MODEL = new Model<Event>(
407      "event-extent",
408      Lists.<Model<?>> newArrayList(),
409      EventAnnotator.class,
410      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "0.1", "-s", "1"),
411      CLASSIFIER_PARAM_SEARCH_SPACE,
412      Model.EvaluationType.NORMAL,
413      Model.LoggingType.NONE,
414      Event.class,
415      AnnotationStatistics.<Event> annotationToSpan(),
416      AnnotationStatistics.<Event, String> annotationToNull(),
417      null);
418
419  private static final Model<Event> EVENT_ASPECT_MODEL = new Model<Event>(
420      "event-aspect",
421      Lists.<Model<?>> newArrayList(EVENT_EXTENT_MODEL),
422      EventAspectAnnotator.class,
423      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "5", "-s", "0"),
424      CLASSIFIER_PARAM_SEARCH_SPACE,
425      Model.EvaluationType.NORMAL,
426      Model.LoggingType.NONE,
427      Event.class,
428      AnnotationStatistics.<Event> annotationToSpan(),
429      AnnotationStatistics.<Event> annotationToFeatureValue("aspect"),
430      "aspect");
431
432  private static final Model<Event> EVENT_CLASS_MODEL = new Model<Event>(
433      "event-class",
434      Lists.<Model<?>> newArrayList(EVENT_EXTENT_MODEL),
435      EventClassAnnotator.class,
436      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "5", "-s", "0"),
437      CLASSIFIER_PARAM_SEARCH_SPACE,
438      Model.EvaluationType.NORMAL,
439      Model.LoggingType.NONE,
440      Event.class,
441      AnnotationStatistics.<Event> annotationToSpan(),
442      AnnotationStatistics.<Event> annotationToFeatureValue("eventClass"),
443      "eventClass");
444
445  private static final Model<Event> EVENT_MODALITY_MODEL = new Model<Event>(
446      "event-modality",
447      Lists.<Model<?>> newArrayList(EVENT_EXTENT_MODEL),
448      EventModalityAnnotator.class,
449      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "1", "-s", "1"),
450      CLASSIFIER_PARAM_SEARCH_SPACE,
451      Model.EvaluationType.NORMAL,
452      Model.LoggingType.NONE,
453      Event.class,
454      AnnotationStatistics.<Event> annotationToSpan(),
455      AnnotationStatistics.<Event> annotationToFeatureValue("modality"),
456      "modality");
457
458  private static final Model<Event> EVENT_POLARITY_MODEL = new Model<Event>(
459      "event-polarity",
460      Lists.<Model<?>> newArrayList(EVENT_EXTENT_MODEL),
461      EventPolarityAnnotator.class,
462      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "1", "-s", "1"),
463      CLASSIFIER_PARAM_SEARCH_SPACE,
464      Model.EvaluationType.NORMAL,
465      Model.LoggingType.NONE,
466      Event.class,
467      AnnotationStatistics.<Event> annotationToSpan(),
468      AnnotationStatistics.<Event> annotationToFeatureValue("polarity"),
469      "polarity");
470
471  private static final Model<Event> EVENT_TENSE_MODEL = new Model<Event>(
472      "event-tense",
473      Lists.<Model<?>> newArrayList(EVENT_EXTENT_MODEL),
474      EventTenseAnnotator.class,
475      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "0.5", "-s", "1"),
476      CLASSIFIER_PARAM_SEARCH_SPACE,
477      Model.EvaluationType.NORMAL,
478      Model.LoggingType.NONE,
479      Event.class,
480      AnnotationStatistics.<Event> annotationToSpan(),
481      AnnotationStatistics.<Event> annotationToFeatureValue("tense"),
482      "tense");
483
484  private static final Model<TemporalLink> TLINK_EVENT_DOCTIME_MODEL = new Model<TemporalLink>(
485      "tlink-event-doctime",
486      Lists.<Model<?>> newArrayList(
487          EVENT_EXTENT_MODEL,
488          EVENT_ASPECT_MODEL,
489          EVENT_CLASS_MODEL,
490          EVENT_MODALITY_MODEL,
491          EVENT_POLARITY_MODEL,
492          EVENT_TENSE_MODEL),
493      TemporalLinkEventToDocumentCreationTimeAnnotator.class,
494      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "50", "-s", "1"),
495      CLASSIFIER_PARAM_SEARCH_SPACE,
496      Model.EvaluationType.INTERSECTED_SPANS,
497      Model.LoggingType.NONE,
498      TemporalLink.class,
499      TEMPORAL_LINK_TO_SPANS,
500      TEMPORAL_LINK_TO_RELATION,
501      null);
502
503  private static final Model<TemporalLink> TLINK_EVENT_SENTTIME_MODEL = new Model<TemporalLink>(
504      "tlink-event-senttime",
505      Lists.<Model<?>> newArrayList(
506          TIME_EXTENT_MODEL,
507          TIME_TYPE_MODEL,
508          EVENT_EXTENT_MODEL,
509          EVENT_ASPECT_MODEL,
510          EVENT_CLASS_MODEL,
511          EVENT_MODALITY_MODEL,
512          EVENT_POLARITY_MODEL,
513          EVENT_TENSE_MODEL),
514      TemporalLinkEventToSameSentenceTimeAnnotator.class,
515      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "0.5", "-s", "1"),
516      CLASSIFIER_PARAM_SEARCH_SPACE,
517      Model.EvaluationType.INTERSECTED_SPANS,
518      Model.LoggingType.NONE,
519      TemporalLink.class,
520      TEMPORAL_LINK_TO_SPANS,
521      TEMPORAL_LINK_TO_RELATION,
522      null);
523
524  private static final Model<TemporalLink> TLINK_EVENT_SUBORDEVENT_MODEL = new Model<TemporalLink>(
525      "tlink-event-subordevent",
526      Lists.<Model<?>> newArrayList(
527          EVENT_EXTENT_MODEL,
528          EVENT_ASPECT_MODEL,
529          EVENT_CLASS_MODEL,
530          EVENT_MODALITY_MODEL,
531          EVENT_POLARITY_MODEL,
532          EVENT_TENSE_MODEL),
533      TemporalLinkEventToSubordinatedEventAnnotator.class,
534      new Model.Params(LibLinearStringOutcomeDataWriter.class, "-c", "0.1", "-s", "1"),
535      CLASSIFIER_PARAM_SEARCH_SPACE,
536      Model.EvaluationType.INTERSECTED_SPANS,
537      Model.LoggingType.NONE,
538      TemporalLink.class,
539      TEMPORAL_LINK_TO_SPANS,
540      TEMPORAL_LINK_TO_RELATION,
541      null);
542
543  private ImmutableMultimap<Model<?>, Model.Params> models;
544
545  private List<File> inferredTLinksDirectories;
546  
547  private boolean useVerbClauseTlinks;
548  
549  private boolean relationsOnly;
550
551  public TempEval2013Evaluation(
552      File baseDirectory,
553      ImmutableMultimap<Model<?>, Model.Params> models,
554      List<File> inferredTLinksDirectories,
555      boolean useVerbClauseTlinks,
556      boolean relationsOnly) {
557    super(baseDirectory);
558    this.models = models;
559    this.inferredTLinksDirectories = inferredTLinksDirectories;
560    this.useVerbClauseTlinks = useVerbClauseTlinks;
561    this.relationsOnly = relationsOnly;
562  }
563
564  @Override
565  protected CollectionReader getCollectionReader(List<File> files) throws Exception {
566    return UriCollectionReader.getCollectionReaderFromFiles(files);
567  }
568  
569  @Override
570  public void train(CollectionReader reader, File directory) throws Exception {
571    AggregateBuilder builder = new AggregateBuilder();
572
573    // read the manual TimeML annotations into the CAS
574    builder.add(AnalysisEngineFactory.createEngineDescription(
575        ViewCreatorAnnotator.class,
576        ViewCreatorAnnotator.PARAM_VIEW_NAME,
577        TimeMlGoldAnnotator.TIMEML_VIEW_NAME));
578    builder.add(
579        UriToDocumentTextAnnotator.getDescription(),
580        CAS.NAME_DEFAULT_SOFA,
581        TimeMlGoldAnnotator.TIMEML_VIEW_NAME);
582    builder.add(TimeMlGoldAnnotator.getDescription());
583    if (this.inferredTLinksDirectories != null) {
584      builder.add(AnalysisEngineFactory.createEngineDescription(
585          UseInferredTlinks.class,
586          UseInferredTlinks.PARAM_INFERRED_TLINKS_DIRECTORIES,
587          this.inferredTLinksDirectories));
588    }
589    if (this.useVerbClauseTlinks) {
590      builder.add(PlainTextTlinkGoldAnnotator.getDescription());
591    }
592    builder.add(AnalysisEngineFactory.createEngineDescription(FixTimeML.class));
593
594    // only add sentences and other annotations under <TEXT>
595    builder.add(AnalysisEngineFactory.createEngineDescription(
596        SentenceAnnotator.class,
597        SentenceAnnotator.PARAM_SENTENCE_MODEL_PATH,
598        "/models/en-sent.bin",
599        SentenceAnnotator.PARAM_WINDOW_CLASS_NAMES,
600        new Class<?>[] { Text.class }));
601    builder.add(TokenAnnotator.getDescription());
602    builder.add(PosTaggerAnnotator.getDescription());
603    builder.add(DefaultSnowballStemmer.getDescription("English"));
604    builder.add(ParserAnnotator.getDescription());
605
606    // add a data write for each model and its various parameters
607    for (Model<?> model : this.models.keySet()) {
608      for (Model.Params params : this.models.get(model)) {
609        builder.add(model.getWriterDescription(directory, params));
610      }
611    }
612
613    // run the pipeline
614    SimplePipeline.runPipeline(reader, builder.createAggregate());
615
616    // train each model with each of its various parameters
617    for (Model<?> model : this.models.keySet()) {
618      for (Model.Params params : this.models.get(model)) {
619        System.err.printf("Training: %s %s\n", model.name, params);
620        model.train(directory, params);
621      }
622    }
623  }
624
625  @Override
626  protected ImmutableTable<Model<?>, Model.Params, AnnotationStatistics<String>> test(
627      CollectionReader reader,
628      File directory) throws Exception {
629    String goldViewName = "GoldView";
630    AggregateBuilder preprocess = new AggregateBuilder();
631
632    // read the manual TimeML annotations into the gold view
633    preprocess.add(AnalysisEngineFactory.createEngineDescription(
634        ViewCreatorAnnotator.class,
635        ViewCreatorAnnotator.PARAM_VIEW_NAME,
636        TimeMlGoldAnnotator.TIMEML_VIEW_NAME));
637    preprocess.add(
638        UriToDocumentTextAnnotator.getDescription(),
639        CAS.NAME_DEFAULT_SOFA,
640        TimeMlGoldAnnotator.TIMEML_VIEW_NAME);
641    preprocess.add(AnalysisEngineFactory.createEngineDescription(
642        ViewCreatorAnnotator.class,
643        ViewCreatorAnnotator.PARAM_VIEW_NAME,
644        goldViewName));
645    preprocess.add(TimeMlGoldAnnotator.getDescription(), CAS.NAME_DEFAULT_SOFA, goldViewName);
646    if (this.inferredTLinksDirectories != null) {
647      preprocess.add(AnalysisEngineFactory.createEngineDescription(
648          UseInferredTlinks.class,
649          UseInferredTlinks.PARAM_INFERRED_TLINKS_DIRECTORIES,
650          this.inferredTLinksDirectories), CAS.NAME_DEFAULT_SOFA, goldViewName);
651    }
652    if (this.useVerbClauseTlinks) {
653      preprocess.add(
654          PlainTextTlinkGoldAnnotator.getDescription(),
655          CAS.NAME_DEFAULT_SOFA,
656          goldViewName);
657    }
658    preprocess.add(
659        AnalysisEngineFactory.createEngineDescription(FixTimeML.class),
660        CAS.NAME_DEFAULT_SOFA,
661        goldViewName);
662    preprocess.add(AnalysisEngineFactory.createEngineDescription(
663        CopyTextAndDocumentCreationTime.class,
664        CopyTextAndDocumentCreationTime.PARAM_SOURCE_VIEW,
665        goldViewName));
666    if (this.relationsOnly) {
667      preprocess.add(AnalysisEngineFactory.createEngineDescription(
668          CopyEventsAndTimes.class,
669          CopyEventsAndTimes.PARAM_SOURCE_VIEW,
670          goldViewName));
671    } 
672
673    // only add sentences and other annotations under <TEXT>
674    preprocess.add(AnalysisEngineFactory.createEngineDescription(
675        SentenceAnnotator.class,
676        SentenceAnnotator.PARAM_SENTENCE_MODEL_PATH,
677        "/models/en-sent.bin",
678        SentenceAnnotator.PARAM_WINDOW_CLASS_NAMES,
679        new Class<?>[] { Text.class }));
680    preprocess.add(TokenAnnotator.getDescription());
681    preprocess.add(PosTaggerAnnotator.getDescription());
682    preprocess.add(DefaultSnowballStemmer.getDescription("English"));
683    preprocess.add(ParserAnnotator.getDescription());
684    AnalysisEngine preprocessEngine = preprocess.createAggregate();
685
686    // finalize TLINK ids and write out TimeML files
687    AggregateBuilder postprocess = new AggregateBuilder();
688    postprocess.add(AnalysisEngineFactory.createEngineDescription(ShrinkTimesContainingEvents.class));
689    postprocess.add(AnalysisEngineFactory.createEngineDescription(SetTemporalLinkIDs.class));
690    postprocess.add(TempEval2013Writer.getDescription(new File(this.baseDirectory, "timeml")));
691    AnalysisEngine postprocessEngine = postprocess.createAggregate();
692
693    // create one AnalysisEngine and one AnnotationStatistics for each model/parameters combination
694    ImmutableTable.Builder<Model<?>, Model.Params, AnalysisEngine> enginesBuilder = ImmutableTable.builder();
695    ImmutableTable.Builder<Model<?>, Model.Params, AnnotationStatistics<String>> statsBuilder = ImmutableTable.builder();
696    for (Model<?> model : this.models.keySet()) {
697      for (Model.Params params : this.models.get(model)) {
698        AnalysisEngineDescription desc = model.getAnnotatorDescription(directory, params);
699        enginesBuilder.put(model, params, AnalysisEngineFactory.createEngine(desc));
700        statsBuilder.put(model, params, new AnnotationStatistics<String>());
701      }
702    }
703    ImmutableTable<Model<?>, Model.Params, AnalysisEngine> engines = enginesBuilder.build();
704    ImmutableTable<Model<?>, Model.Params, AnnotationStatistics<String>> stats = statsBuilder.build();
705
706    // evaluate each CAS in the test data
707    JCasIterator iter = new JCasIterator(reader, preprocessEngine);
708    while (iter.hasNext()) {
709      JCas jCas = iter.next();
710
711      // evaluate each model/parameters combination
712      JCas goldView = jCas.getView(goldViewName);
713      JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
714      for (Model<?> model : engines.rowKeySet()) {
715        // remove any annotations from previous models that would interfere with the evaluation
716        Map<? extends TOP, String> annotations = model.removeModelAnnotations(jCas);
717        
718        // apply and evaluate the model with each set of parameters
719        for (Map.Entry<Model.Params, AnalysisEngine> entry : engines.row(model).entrySet()) {
720          Model.Params params = entry.getKey();
721          AnalysisEngine engine = entry.getValue();
722  
723          // remove any annotations from this model with other parameter settings
724          model.removeModelAnnotations(jCas);
725          
726          // process and evaluate
727          engine.process(jCas);
728          model.evaluate(goldView, systemView, stats.get(model, params));
729        }
730        
731        // restore any annotations from previous models
732        model.restoreModelAnnotations(jCas, annotations);
733      }
734
735      postprocessEngine.process(jCas);
736    }
737    return stats;
738  }
739
740  public static class CopyTextAndDocumentCreationTime extends JCasAnnotator_ImplBase {
741
742    public static final String PARAM_SOURCE_VIEW = "SourceView";
743
744    @ConfigurationParameter(name = PARAM_SOURCE_VIEW)
745    private String sourceViewName;
746
747    @Override
748    public void process(JCas jCas) throws AnalysisEngineProcessException {
749      JCas sourceView;
750      try {
751        sourceView = jCas.getView(this.sourceViewName);
752      } catch (CASException e) {
753        throw new AnalysisEngineProcessException(e);
754      }
755      CasCopier copier = new CasCopier(sourceView.getCas(), jCas.getCas());
756      Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
757
758      // copy document text
759      jCas.setDocumentText(sourceView.getDocumentText());
760
761      // copy text annotation
762      Text sourceText = JCasUtil.selectSingle(sourceView, Text.class);
763      Text text = (Text) copier.copyFs(sourceText);
764      text.setFeatureValue(sofaFeature, jCas.getSofa());
765      text.addToIndexes();
766
767      // copy document creation time
768      DocumentCreationTime sourceTime = JCasUtil.selectSingle(
769          sourceView,
770          DocumentCreationTime.class);
771      DocumentCreationTime time = (DocumentCreationTime) copier.copyFs(sourceTime);
772      time.setFeatureValue(sofaFeature, jCas.getSofa());
773      time.addToIndexes();
774    }
775  }
776
777  public static class CopyEventsAndTimes extends JCasAnnotator_ImplBase {
778
779    public static final String PARAM_SOURCE_VIEW = "SourceView";
780
781    @ConfigurationParameter(name = PARAM_SOURCE_VIEW)
782    private String sourceViewName;
783
784    @Override
785    public void process(JCas jCas) throws AnalysisEngineProcessException {
786      JCas sourceView;
787      try {
788        sourceView = jCas.getView(this.sourceViewName);
789      } catch (CASException e) {
790        throw new AnalysisEngineProcessException(e);
791      }
792      CasCopier copier = new CasCopier(sourceView.getCas(), jCas.getCas());
793      Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
794      for (Event sourceEvent : JCasUtil.select(sourceView, Event.class)) {
795        Event event = (Event) copier.copyFs(sourceEvent);
796        event.setFeatureValue(sofaFeature, jCas.getSofa());
797        if (event.getEventInstanceID() ==  null) {
798          event.setEventInstanceID(event.getId().replaceAll("^e", "ei"));
799        }
800        event.addToIndexes();
801      }
802      for (Time sourceTime : JCasUtil.select(sourceView, Time.class)) {
803        if (!(sourceTime instanceof DocumentCreationTime)) {
804          Time time = (Time) copier.copyFs(sourceTime);
805          time.setFeatureValue(sofaFeature, jCas.getSofa());
806          time.addToIndexes();
807        }
808      }
809    }
810  }
811
812  public static class FixTimeML extends JCasAnnotator_ImplBase {
813
814    private static Set<String> IS_SIMULTANEOUS = Sets.newHashSet("DURING", "DURING_INV", "IDENTITY");
815
816    @Override
817    public void process(JCas jCas) throws AnalysisEngineProcessException {
818      // add missing event attributes
819      for (Event event : JCasUtil.select(jCas, Event.class)) {
820        if (event.getAspect() == null) {
821          event.setAspect("NONE");
822        }
823        if (event.getModality() == null) {
824          event.setModality("none");
825        }
826        String modality = event.getModality();
827        event.setModality(modality.toLowerCase().replaceAll("_", " ").replaceAll("^'d$", "would"));
828        if (event.getPolarity() == null) {
829          event.setPolarity("POS");
830        }
831        if (event.getTense() == null) {
832          event.setTense("NONE");
833        }
834      }
835
836      // simplify simultaneous relations
837      for (TemporalLink tlink : JCasUtil.select(jCas, TemporalLink.class)) {
838        if (IS_SIMULTANEOUS.contains(tlink.getRelationType())) {
839          tlink.setRelationType("SIMULTANEOUS");
840        }
841      }
842      
843      // remove overlap relations
844      for (TemporalLink tlink : Lists.newArrayList(JCasUtil.select(jCas, TemporalLink.class))) {
845        if ("OVERLAP".equals(tlink.getRelationType())) {
846          tlink.removeFromIndexes();
847        }
848      }
849    }
850  }
851
852  public static class SetTemporalLinkIDs extends JCasAnnotator_ImplBase {
853
854    @Override
855    public void process(JCas jCas) throws AnalysisEngineProcessException {
856      int index = 1;
857      for (TemporalLink tlink : JCasUtil.select(jCas, TemporalLink.class)) {
858        tlink.setId(String.format("l%d", index));
859        ++index;
860      }
861
862    }
863  }
864
865  public static class ShrinkTimesContainingEvents extends JCasAnnotator_ImplBase {
866
867    @Override
868    public void process(JCas jCas) throws AnalysisEngineProcessException {
869      String text = jCas.getDocumentText();
870      for (Time time : JCasUtil.select(jCas, Time.class)) {
871        List<Event> events = JCasUtil.selectCovered(jCas, Event.class, time);
872        if (!events.isEmpty()) {
873          int eventsBegin = events.get(0).getBegin();
874          int eventsEnd = events.get(events.size() - 1).getEnd();
875          int timeBegin, timeEnd;
876          if (time.getBegin() - eventsBegin > time.getEnd() - eventsEnd) {
877            timeBegin = time.getBegin();
878            timeEnd = eventsBegin - 1;
879            while (timeEnd > timeBegin && Character.isWhitespace(text.charAt(timeEnd))) {
880              --timeEnd;
881            }
882          } else {
883            timeEnd = time.getEnd();
884            timeBegin = eventsEnd;
885            while (timeBegin < timeEnd && Character.isWhitespace(text.charAt(timeBegin))) {
886              ++timeBegin;
887            }
888          }
889          String oldText = time.getCoveredText();
890          time.setBegin(timeBegin);
891          time.setEnd(timeEnd);
892          String newText = time.getCoveredText();
893          this.getLogger().warn(String.format("shrinking \"%s\" to \"%s\"", oldText, newText));
894        }
895      }
896
897    }
898  }
899
900  public static class UseInferredTlinks extends JCasAnnotator_ImplBase {
901
902    public static final String PARAM_INFERRED_TLINKS_DIRECTORIES = "InferredTLinksDirectories";
903
904    @ConfigurationParameter(name = PARAM_INFERRED_TLINKS_DIRECTORIES, mandatory = true)
905    private List<File> inferredTLinksDirectories;
906    private Map<String, File> fileNameToFile;
907    
908
909    @Override
910    public void initialize(UimaContext context) throws ResourceInitializationException {
911      super.initialize(context);
912      this.fileNameToFile = Maps.newHashMap();
913      for (File dir : this.inferredTLinksDirectories) {
914        for (File file : dir.listFiles()) {
915          String fileName = file.getName();
916          if (fileName.endsWith(".tml")) {
917            String extension = String.format("[.]%s[.]tml$",  dir.getName());
918            this.fileNameToFile.put(fileName.replaceAll(extension, ".tml"), file);
919          }
920        }
921      }
922    }
923
924    @Override
925    public void process(JCas jCas) throws AnalysisEngineProcessException {
926      String fileName = new File(ViewUriUtil.getURI(jCas).getPath()).getName();
927      File inferredTLinksFile = this.fileNameToFile.get(fileName);
928
929      if (inferredTLinksFile == null) {
930        this.getLogger().warn("No inferred TLINKs found for " + fileName);
931      } else {
932
933        // remove existing temporal links
934        for (TemporalLink tlink : Lists.newArrayList(JCasUtil.select(jCas, TemporalLink.class))) {
935          tlink.removeFromIndexes();
936        }
937
938        // parse the XML document
939        SAXBuilder builder = new SAXBuilder();
940        Document xml;
941        try {
942          xml = builder.build(inferredTLinksFile);
943        } catch (JDOMException e) {
944          throw new AnalysisEngineProcessException(e);
945        } catch (IOException e) {
946          throw new AnalysisEngineProcessException(e);
947        }
948
949        // index all anchors by their IDs
950        Map<String, Anchor> idToAnchor = Maps.newHashMap();
951        for (Anchor anchor : JCasUtil.select(jCas, Anchor.class)) {
952          idToAnchor.put(anchor.getId(), anchor);
953          if (anchor instanceof Event) {
954            idToAnchor.put(((Event) anchor).getEventInstanceID(), anchor);
955          }
956        }
957
958        // create a TemporalLink for each TLINK in the file
959        int offset = jCas.getDocumentText().length();
960        for (Element linkElem : xml.getDescendants(Filters.element("TLINK"))) {
961          // get the relation
962          String relationType = linkElem.getAttributeValue("relType");
963          if (relationType == null) {
964            error(jCas, linkElem, "No relation type specified in %s");
965          }
966
967          // get the source
968          String sourceEventID = linkElem.getAttributeValue("eventInstanceID");
969          String sourceTimeID = linkElem.getAttributeValue("timeID");
970          if (!(sourceEventID == null ^ sourceTimeID == null)) {
971            error(jCas, linkElem, "Expected exactly 1 source attribute, found %s");
972          }
973          String sourceID = sourceEventID != null ? sourceEventID : sourceTimeID;
974          Anchor source = idToAnchor.get(sourceID);
975          if (source == null) {
976            this.getLogger().warn(
977                errorString(jCas, linkElem, "No annotation found for source of %s"));
978            continue;
979          }
980
981          // get the target
982          String targetEventID = linkElem.getAttributeValue("relatedToEventInstance");
983          String targetTimeID = linkElem.getAttributeValue("relatedToTime");
984          if (!(targetEventID == null ^ targetTimeID == null)) {
985            error(jCas, linkElem, "Expected exactly 1 target attribute, found %s");
986          }
987          String targetID = targetEventID != null ? targetEventID : targetTimeID;
988          Anchor target = idToAnchor.get(targetID);
989          if (target == null) {
990            this.getLogger().warn(
991                errorString(jCas, linkElem, "No annotation found for target of %s"));
992            continue;
993          }
994
995          // add the temporal link
996          TemporalLink link = new TemporalLink(jCas, offset, offset);
997          link.setRelationType(relationType);
998          link.setSource(source);
999          link.setTarget(target);
1000          link.addToIndexes();
1001        }
1002      }
1003    }
1004
1005    private static String errorString(JCas jCas, Element element, String message)
1006        throws AnalysisEngineProcessException {
1007      URI uri = ViewUriUtil.getURI(jCas);
1008      String elemString = new XMLOutputter().outputString(element);
1009      return String.format("In %s: " + message, uri, elemString);
1010    }
1011
1012    private static void error(JCas jCas, Element element, String message)
1013        throws AnalysisEngineProcessException {
1014      throw new IllegalArgumentException(errorString(jCas, element, message));
1015    }
1016  }
1017}