001/* 002 * Copyright (c) 2013, Regents of the University of Colorado 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions are met: 007 * 008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 011 * 012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 022 * POSSIBILITY OF SUCH DAMAGE. 023 */ 024package org.cleartk.corpus.timeml; 025 026import java.io.File; 027import java.io.FileOutputStream; 028import java.io.IOException; 029import java.util.ArrayList; 030import java.util.Collections; 031import java.util.List; 032import java.util.TreeMap; 033 034import org.apache.uima.analysis_engine.AnalysisEngineDescription; 035import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 036import org.apache.uima.cas.CAS; 037import org.apache.uima.cas.CASRuntimeException; 038import org.apache.uima.cas.FSIterator; 039import org.apache.uima.cas.Feature; 040import org.apache.uima.cas.FeatureStructure; 041import org.apache.uima.cas.Type; 042import org.apache.uima.cas.text.AnnotationFS; 043import org.apache.uima.jcas.JCas; 044import org.apache.uima.jcas.tcas.Annotation; 045import org.apache.uima.jcas.tcas.DocumentAnnotation; 046import org.apache.uima.resource.ResourceInitializationException; 047import org.cleartk.timeml.type.Anchor; 048import org.cleartk.timeml.type.DocumentCreationTime; 049import org.cleartk.timeml.type.Event; 050import org.cleartk.timeml.type.TemporalLink; 051import org.cleartk.timeml.type.Time; 052import org.cleartk.util.ViewUriUtil; 053import org.jdom2.Content; 054import org.jdom2.Element; 055import org.jdom2.Text; 056import org.jdom2.output.XMLOutputter; 057import org.apache.uima.fit.component.JCasAnnotator_ImplBase; 058import org.apache.uima.fit.descriptor.ConfigurationParameter; 059import org.apache.uima.fit.factory.AnalysisEngineFactory; 060 061import com.google.common.base.Function; 062import com.google.common.base.Objects; 063import com.google.common.base.Objects.ToStringHelper; 064import com.google.common.collect.Ordering; 065import com.google.common.primitives.Ints; 066 067/** 068 * Write events, times and temporal relations in the TempEval 2013 format. 069 * 070 * <br> 071 * Copyright (c) 2013, Regents of the University of Colorado <br> 072 * All rights reserved. 073 * 074 * @author Steven Bethard 075 */ 076public class TempEval2013Writer extends JCasAnnotator_ImplBase { 077 078 public static AnalysisEngineDescription getDescription(File outputDir) 079 throws ResourceInitializationException { 080 return AnalysisEngineFactory.createEngineDescription( 081 TempEval2013Writer.class, 082 PARAM_OUTPUT_DIRECTORY, 083 outputDir); 084 } 085 086 public static final String PARAM_OUTPUT_DIRECTORY = "outputDirectory"; 087 088 @ConfigurationParameter( 089 name = PARAM_OUTPUT_DIRECTORY, 090 description = "Provides the path where the TimeML documents should be written.", 091 mandatory = true) 092 private File outputDirectory; 093 094 @Override 095 public void process(JCas jCas) throws AnalysisEngineProcessException { 096 String text = jCas.getDocumentText(); 097 098 // collect annotations that will be written and sort them by decreasing begin and increasing end 099 Ordering<Span> spanOrdering = Span.BY_DECREASING_BEGIN.compound(Span.BY_INCREASING_END); 100 List<AnnotationFS> annotations = this.getAnnotations(jCas); 101 Collections.sort(annotations, spanOrdering.onResultOf(new Function<AnnotationFS, Span>() { 102 @Override 103 public Span apply(AnnotationFS annotation) { 104 return new Span(annotation); 105 } 106 })); 107 108 // create an XML element for each annotation; each annotation's children will already be 109 // complete because of the sorting order 110 TreeMap<Span, List<Element>> spanToElements = new TreeMap<Span, List<Element>>(spanOrdering); 111 for (AnnotationFS annotation : annotations) { 112 113 // collect the spans covered by this annotation 114 // (assume annotations with 0-spans do not cover any annotations) 115 List<Span> coveredSpans = new ArrayList<Span>(); 116 if (annotation.getBegin() != annotation.getEnd()) { 117 for (Span span : spanToElements.headMap(new Span(annotation), true).keySet()) { 118 if (annotation.getBegin() <= span.begin && span.end <= annotation.getEnd()) { 119 coveredSpans.add(span); 120 } 121 } 122 } 123 124 // collect begin and end points for all the covered XML elements, in left-to-right order 125 List<Integer> boundaries = new ArrayList<Integer>(); 126 boundaries.add(annotation.getBegin()); 127 boundaries.add(annotation.getEnd()); 128 for (Span span : coveredSpans) { 129 boundaries.add(span.begin); 130 boundaries.add(span.end); 131 } 132 Collections.sort(boundaries); 133 134 // create the children - first a text node, then an element, then a text node, etc. 135 List<Content> children = new ArrayList<Content>(); 136 for (int i = 0; i < boundaries.size() - 1; ++i) { 137 int begin = boundaries.get(i); 138 int end = boundaries.get(i + 1); 139 140 // text node between elements 141 if (i % 2 == 0) { 142 children.add(new Text(text.substring(begin, end))); 143 } 144 145 // element that has already been completed 146 else { 147 children.addAll(spanToElements.remove(new Span(begin, end))); 148 } 149 } 150 151 // convert the annotation to an element and add the children 152 Element element = this.toElement(annotation); 153 element.addContent(children); 154 155 // map the annotation's span to the newly created elements 156 Span span = new Span(annotation); 157 if (!spanToElements.containsKey(span)) { 158 spanToElements.put(span, new ArrayList<Element>()); 159 } 160 spanToElements.get(span).add(element); 161 } 162 163 // the root will be the only remaining element in the map, and will span the entire text 164 Span rootSpan = new Span(0, text.length()); 165 List<Element> rootElements = spanToElements.get(rootSpan); 166 if (rootElements == null || rootElements.size() != 1) { 167 throw new IllegalArgumentException("Expected exactly one root, found " + spanToElements); 168 } 169 Element root = rootElements.get(0); 170 171 // write the XML to the output file 172 XMLOutputter outputter = new XMLOutputter(); 173 String fileName = new File(ViewUriUtil.getURI(jCas).getPath()).getName(); 174 String inputSuffix = ".TE3input"; 175 if (fileName.endsWith(inputSuffix)) { 176 fileName = fileName.substring(0, fileName.length() - inputSuffix.length()); 177 } 178 if (!fileName.endsWith(".tml")) { 179 fileName += ".tml"; 180 } 181 if (!this.outputDirectory.exists()) { 182 this.outputDirectory.mkdirs(); 183 } 184 File outputFile = new File(this.outputDirectory, fileName); 185 try { 186 FileOutputStream outputStream = new FileOutputStream(outputFile); 187 try { 188 outputter.output(root, outputStream); 189 } finally { 190 outputStream.close(); 191 } 192 } catch (IOException e) { 193 throw new AnalysisEngineProcessException(e); 194 } 195 } 196 197 protected List<AnnotationFS> getAnnotations(JCas jCas) { 198 int makeInstanceOffset = jCas.getDocumentText().length(); 199 List<AnnotationFS> annotations = new ArrayList<AnnotationFS>(); 200 FSIterator<Annotation> iterator = jCas.getAnnotationIndex().iterator(); 201 while (iterator.isValid() && iterator.hasNext()) { 202 Annotation annotation = iterator.next(); 203 if (annotation instanceof DocumentAnnotation || annotation instanceof org.cleartk.timeml.type.Text || annotation instanceof Event || annotation instanceof Time || annotation instanceof TemporalLink) { 204 annotations.add(annotation); 205 if (annotation instanceof DocumentCreationTime) { 206 annotations.add(new DCT((DocumentCreationTime) annotation)); 207 } 208 if (annotation instanceof Event) { 209 annotations.add(new MakeInstance((Event) annotation, makeInstanceOffset)); 210 } 211 } 212 } 213 return annotations; 214 } 215 216 protected Element toElement(AnnotationFS annotation) { 217 Element element; 218 if (annotation instanceof DocumentAnnotation) { 219 element = new Element("TimeML"); 220 } else if (annotation instanceof DCT) { 221 element = new Element("DCT"); 222 } else if (annotation instanceof org.cleartk.timeml.type.Text) { 223 element = new Element("TEXT"); 224 } else if (annotation instanceof Event) { 225 Event event = (Event) annotation; 226 element = new Element("EVENT"); 227 element.setAttribute("eid", event.getId()); 228 element.setAttribute("class", nullToEmpty(event.getEventClass())); 229 element.setAttribute("tense", nullToEmpty(event.getTense())); 230 element.setAttribute("aspect", nullToEmpty(event.getAspect())); 231 element.setAttribute("polarity", nullToEmpty(event.getPolarity())); 232 element.setAttribute("modality", nullToEmpty(event.getModality())); 233 } else if (annotation instanceof MakeInstance) { 234 MakeInstance makeInstance = (MakeInstance) annotation; 235 element = new Element("MAKEINSTANCE"); 236 element.setAttribute("eiid", makeInstance.annotation.getEventInstanceID()); 237 element.setAttribute("eventID", makeInstance.annotation.getId()); 238 } else if (annotation instanceof Time) { 239 Time time = (Time) annotation; 240 element = new Element("TIMEX3"); 241 element.setAttribute("tid", time.getId()); 242 element.setAttribute("type", nullToEmpty(time.getTimeType())); 243 element.setAttribute("value", nullToEmpty(time.getValue())); 244 } else if (annotation instanceof TemporalLink) { 245 TemporalLink tlink = (TemporalLink) annotation; 246 Anchor source = tlink.getSource(); 247 Anchor target = tlink.getTarget(); 248 element = new Element("TLINK"); 249 element.setAttribute("lid", tlink.getId()); 250 element.setAttribute("relType", tlink.getRelationType()); 251 if (source instanceof Event) { 252 Event event = (Event) source; 253 element.setAttribute("eventInstanceID", event.getEventInstanceID()); 254 } else if (source instanceof Time) { 255 element.setAttribute("timeID", source.getId()); 256 } 257 if (target instanceof Event) { 258 Event event = (Event) target; 259 element.setAttribute("relatedToEventInstance", event.getEventInstanceID()); 260 } else if (target instanceof Time) { 261 element.setAttribute("relatedToTime", target.getId()); 262 } 263 } else { 264 throw new IllegalArgumentException("Unsupported annotation type: " + annotation); 265 } 266 return element; 267 } 268 269 private static String nullToEmpty(String string) { 270 if (string == null) { 271 string = ""; 272 } 273 return string; 274 } 275 276 private static class Span implements Comparable<Span>{ 277 int begin; 278 int end; 279 280 public Span(int begin, int end) { 281 this.begin = begin; 282 this.end = end; 283 } 284 285 public Span(AnnotationFS annotation) { 286 this(annotation.getBegin(), annotation.getEnd()); 287 } 288 289 @Override 290 public String toString() { 291 ToStringHelper helper = Objects.toStringHelper(this.getClass()); 292 return helper.add("begin", this.begin).add("end", this.end).toString(); 293 } 294 295 @Override 296 public int hashCode() { 297 return Objects.hashCode(this.begin, this.end); 298 } 299 300 @Override 301 public boolean equals(Object obj) { 302 boolean result = false; 303 if (obj.getClass().equals(Span.class)) { 304 Span that = (Span)obj; 305 result = this.begin == that.begin && this.end == that.end; 306 } 307 return result; 308 } 309 310 @Override 311 public int compareTo(Span that) { 312 int compare = Ints.compare(this.begin, that.begin); 313 if (compare != 0) { 314 compare = Ints.compare(this.end, that.end); 315 } 316 return compare; 317 } 318 319 static Ordering<Span> BY_DECREASING_BEGIN = new Ordering<Span>() { 320 @Override 321 public int compare(Span left, Span right) { 322 return -Ints.compare(left.begin, right.begin); 323 } 324 }; 325 326 static Ordering<Span> BY_INCREASING_END = new Ordering<Span>() { 327 @Override 328 public int compare(Span left, Span right) { 329 return Ints.compare(left.end, right.end); 330 } 331 }; 332 } 333 334 private static class MakeInstance extends FakeAnnotation<Event> { 335 public MakeInstance(Event annotation, int offset) { 336 super(annotation, offset, offset); 337 } 338 } 339 340 private static class DCT extends FakeAnnotation<Time> { 341 public DCT(Time time) { 342 super(time, time.getBegin(), time.getEnd()); 343 } 344 } 345 346 // Fake annotation for various elements 347 private static class FakeAnnotation<T extends Annotation> implements AnnotationFS { 348 349 T annotation; 350 private int begin; 351 private int end; 352 353 public FakeAnnotation(T annotation, int begin, int end) { 354 this.annotation = annotation; 355 this.begin = begin; 356 this.end = end; 357 } 358 359 @Override 360 public Object clone() { 361 throw new UnsupportedOperationException(); 362 } 363 364 @Override 365 public CAS getView() { 366 throw new UnsupportedOperationException(); 367 } 368 369 @Override 370 public Type getType() { 371 throw new UnsupportedOperationException(); 372 } 373 374 @Override 375 public void setFeatureValue(Feature feat, FeatureStructure fs) throws CASRuntimeException { 376 throw new UnsupportedOperationException(); 377 } 378 379 @Override 380 public FeatureStructure getFeatureValue(Feature feat) throws CASRuntimeException { 381 throw new UnsupportedOperationException(); 382 } 383 384 @Override 385 public void setStringValue(Feature feat, String s) throws CASRuntimeException { 386 throw new UnsupportedOperationException(); 387 } 388 389 @Override 390 public String getStringValue(Feature f) throws CASRuntimeException { 391 throw new UnsupportedOperationException(); 392 } 393 394 @Override 395 public float getFloatValue(Feature feat) throws CASRuntimeException { 396 throw new UnsupportedOperationException(); 397 } 398 399 @Override 400 public void setFloatValue(Feature feat, float f) throws CASRuntimeException { 401 throw new UnsupportedOperationException(); 402 } 403 404 @Override 405 public int getIntValue(Feature feat) throws CASRuntimeException { 406 throw new UnsupportedOperationException(); 407 } 408 409 @Override 410 public void setIntValue(Feature feat, int i) throws CASRuntimeException { 411 throw new UnsupportedOperationException(); 412 } 413 414 @Override 415 public byte getByteValue(Feature feat) throws CASRuntimeException { 416 throw new UnsupportedOperationException(); 417 } 418 419 @Override 420 public void setByteValue(Feature feat, byte i) throws CASRuntimeException { 421 throw new UnsupportedOperationException(); 422 } 423 424 @Override 425 public boolean getBooleanValue(Feature feat) throws CASRuntimeException { 426 throw new UnsupportedOperationException(); 427 } 428 429 @Override 430 public void setBooleanValue(Feature feat, boolean i) throws CASRuntimeException { 431 throw new UnsupportedOperationException(); 432 } 433 434 @Override 435 public short getShortValue(Feature feat) throws CASRuntimeException { 436 throw new UnsupportedOperationException(); 437 } 438 439 @Override 440 public void setShortValue(Feature feat, short i) throws CASRuntimeException { 441 throw new UnsupportedOperationException(); 442 } 443 444 @Override 445 public long getLongValue(Feature feat) throws CASRuntimeException { 446 throw new UnsupportedOperationException(); 447 } 448 449 @Override 450 public void setLongValue(Feature feat, long i) throws CASRuntimeException { 451 throw new UnsupportedOperationException(); 452 } 453 454 @Override 455 public double getDoubleValue(Feature feat) throws CASRuntimeException { 456 throw new UnsupportedOperationException(); 457 } 458 459 @Override 460 public void setDoubleValue(Feature feat, double i) throws CASRuntimeException { 461 throw new UnsupportedOperationException(); 462 } 463 464 @Override 465 public String getFeatureValueAsString(Feature feat) throws CASRuntimeException { 466 throw new UnsupportedOperationException(); 467 } 468 469 @Override 470 public void setFeatureValueFromString(Feature feat, String s) throws CASRuntimeException { 471 throw new UnsupportedOperationException(); 472 } 473 474 @Override 475 public CAS getCAS() { 476 throw new UnsupportedOperationException(); 477 } 478 479 @Override 480 public int getBegin() { 481 return this.begin; 482 } 483 484 @Override 485 public int getEnd() { 486 return this.end; 487 } 488 489 @Override 490 public String getCoveredText() { 491 throw new UnsupportedOperationException(); 492 } 493 } 494}