001/*
002 * Copyright (c) 2013, Regents of the University of Colorado 
003 * All rights reserved.
004 * 
005 * Redistribution and use in source and binary forms, with or without
006 * modification, are permitted provided that the following conditions are met:
007 * 
008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
011 * 
012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
022 * POSSIBILITY OF SUCH DAMAGE. 
023 */
024package org.cleartk.corpus.timeml;
025
026import java.io.File;
027import java.io.FileOutputStream;
028import java.io.IOException;
029import java.util.ArrayList;
030import java.util.Collections;
031import java.util.List;
032import java.util.TreeMap;
033
034import org.apache.uima.analysis_engine.AnalysisEngineDescription;
035import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
036import org.apache.uima.cas.CAS;
037import org.apache.uima.cas.CASRuntimeException;
038import org.apache.uima.cas.FSIterator;
039import org.apache.uima.cas.Feature;
040import org.apache.uima.cas.FeatureStructure;
041import org.apache.uima.cas.Type;
042import org.apache.uima.cas.text.AnnotationFS;
043import org.apache.uima.jcas.JCas;
044import org.apache.uima.jcas.tcas.Annotation;
045import org.apache.uima.jcas.tcas.DocumentAnnotation;
046import org.apache.uima.resource.ResourceInitializationException;
047import org.cleartk.timeml.type.Anchor;
048import org.cleartk.timeml.type.DocumentCreationTime;
049import org.cleartk.timeml.type.Event;
050import org.cleartk.timeml.type.TemporalLink;
051import org.cleartk.timeml.type.Time;
052import org.cleartk.util.ViewUriUtil;
053import org.jdom2.Content;
054import org.jdom2.Element;
055import org.jdom2.Text;
056import org.jdom2.output.XMLOutputter;
057import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
058import org.apache.uima.fit.descriptor.ConfigurationParameter;
059import org.apache.uima.fit.factory.AnalysisEngineFactory;
060
061import com.google.common.base.Function;
062import com.google.common.base.Objects;
063import com.google.common.base.Objects.ToStringHelper;
064import com.google.common.collect.Ordering;
065import com.google.common.primitives.Ints;
066
067/**
068 * Write events, times and temporal relations in the TempEval 2013 format.
069 * 
070 * <br>
071 * Copyright (c) 2013, Regents of the University of Colorado <br>
072 * All rights reserved.
073 * 
074 * @author Steven Bethard
075 */
076public class TempEval2013Writer extends JCasAnnotator_ImplBase {
077  
078  public static AnalysisEngineDescription getDescription(File outputDir)
079      throws ResourceInitializationException {
080    return AnalysisEngineFactory.createEngineDescription(
081        TempEval2013Writer.class,
082        PARAM_OUTPUT_DIRECTORY,
083        outputDir);
084  }
085
086  public static final String PARAM_OUTPUT_DIRECTORY = "outputDirectory";
087
088  @ConfigurationParameter(
089      name = PARAM_OUTPUT_DIRECTORY,
090      description = "Provides the path where the TimeML documents should be written.",
091      mandatory = true)
092  private File outputDirectory;
093
094  @Override
095  public void process(JCas jCas) throws AnalysisEngineProcessException {
096    String text = jCas.getDocumentText();
097    
098    // collect annotations that will be written and sort them by decreasing begin and increasing end
099    Ordering<Span> spanOrdering = Span.BY_DECREASING_BEGIN.compound(Span.BY_INCREASING_END);
100    List<AnnotationFS> annotations = this.getAnnotations(jCas);
101    Collections.sort(annotations, spanOrdering.onResultOf(new Function<AnnotationFS, Span>() {
102      @Override
103      public Span apply(AnnotationFS annotation) {
104        return new Span(annotation);
105      }
106    }));
107    
108    // create an XML element for each annotation; each annotation's children will already be
109    // complete because of the sorting order
110    TreeMap<Span, List<Element>> spanToElements = new TreeMap<Span, List<Element>>(spanOrdering); 
111    for (AnnotationFS annotation : annotations) {
112      
113      // collect the spans covered by this annotation
114      // (assume annotations with 0-spans do not cover any annotations) 
115      List<Span> coveredSpans = new ArrayList<Span>();
116      if (annotation.getBegin() != annotation.getEnd()) {
117        for (Span span : spanToElements.headMap(new Span(annotation), true).keySet()) {
118          if (annotation.getBegin() <= span.begin && span.end <= annotation.getEnd()) {
119            coveredSpans.add(span);
120          }
121        }
122      }
123      
124      // collect begin and end points for all the covered XML elements, in left-to-right order
125      List<Integer> boundaries = new ArrayList<Integer>();
126      boundaries.add(annotation.getBegin());
127      boundaries.add(annotation.getEnd());
128      for (Span span : coveredSpans) {
129        boundaries.add(span.begin);
130        boundaries.add(span.end);
131      }
132      Collections.sort(boundaries);
133      
134      // create the children - first a text node, then an element, then a text node, etc.
135      List<Content> children = new ArrayList<Content>();
136      for (int i = 0; i < boundaries.size() - 1; ++i) {
137        int begin = boundaries.get(i);
138        int end = boundaries.get(i + 1);
139        
140        // text node between elements
141        if (i % 2 == 0) {
142          children.add(new Text(text.substring(begin, end)));
143        }
144        
145        // element that has already been completed
146        else {
147          children.addAll(spanToElements.remove(new Span(begin, end)));
148        }
149      }
150      
151      // convert the annotation to an element and add the children
152      Element element = this.toElement(annotation);
153      element.addContent(children);
154
155      // map the annotation's span to the newly created elements 
156      Span span = new Span(annotation);
157      if (!spanToElements.containsKey(span)) {
158        spanToElements.put(span, new ArrayList<Element>());
159      }
160      spanToElements.get(span).add(element);
161    }
162    
163    // the root will be the only remaining element in the map, and will span the entire text
164    Span rootSpan = new Span(0, text.length());
165    List<Element> rootElements = spanToElements.get(rootSpan);
166    if (rootElements == null || rootElements.size() != 1) {
167      throw new IllegalArgumentException("Expected exactly one root, found " + spanToElements);
168    }
169    Element root = rootElements.get(0);
170    
171    // write the XML to the output file
172    XMLOutputter outputter = new XMLOutputter();
173    String fileName = new File(ViewUriUtil.getURI(jCas).getPath()).getName();
174    String inputSuffix = ".TE3input";
175    if (fileName.endsWith(inputSuffix)) {
176      fileName = fileName.substring(0, fileName.length() - inputSuffix.length());
177    }
178    if (!fileName.endsWith(".tml")) {
179      fileName += ".tml";
180    }
181    if (!this.outputDirectory.exists()) {
182      this.outputDirectory.mkdirs();
183    }
184    File outputFile = new File(this.outputDirectory, fileName);
185    try {
186      FileOutputStream outputStream = new FileOutputStream(outputFile);
187      try {
188        outputter.output(root, outputStream);
189      } finally {
190        outputStream.close();
191      }
192    } catch (IOException e) {
193      throw new AnalysisEngineProcessException(e);
194    }
195  }
196  
197  protected List<AnnotationFS> getAnnotations(JCas jCas) {
198    int makeInstanceOffset = jCas.getDocumentText().length();
199    List<AnnotationFS> annotations = new ArrayList<AnnotationFS>();
200    FSIterator<Annotation> iterator = jCas.getAnnotationIndex().iterator();
201    while (iterator.isValid() && iterator.hasNext()) {
202      Annotation annotation = iterator.next();
203      if (annotation instanceof DocumentAnnotation || annotation instanceof org.cleartk.timeml.type.Text || annotation instanceof Event || annotation instanceof Time  || annotation instanceof TemporalLink) {
204        annotations.add(annotation);
205        if (annotation instanceof DocumentCreationTime) {
206          annotations.add(new DCT((DocumentCreationTime) annotation));
207        }
208        if (annotation instanceof Event) {
209          annotations.add(new MakeInstance((Event) annotation, makeInstanceOffset));
210        }
211      }
212    }
213    return annotations;
214  }
215  
216  protected Element toElement(AnnotationFS annotation) {
217    Element element;
218    if (annotation instanceof DocumentAnnotation) {
219      element = new Element("TimeML");
220    } else if (annotation instanceof DCT) {
221      element = new Element("DCT");
222    } else if (annotation instanceof org.cleartk.timeml.type.Text) {
223      element = new Element("TEXT");
224    } else if (annotation instanceof Event) {
225      Event event = (Event) annotation;
226      element = new Element("EVENT");
227      element.setAttribute("eid", event.getId());
228      element.setAttribute("class", nullToEmpty(event.getEventClass()));
229      element.setAttribute("tense", nullToEmpty(event.getTense()));
230      element.setAttribute("aspect", nullToEmpty(event.getAspect()));
231      element.setAttribute("polarity", nullToEmpty(event.getPolarity()));
232      element.setAttribute("modality", nullToEmpty(event.getModality()));
233    } else if (annotation instanceof MakeInstance) {
234      MakeInstance makeInstance = (MakeInstance) annotation;
235      element = new Element("MAKEINSTANCE");
236      element.setAttribute("eiid", makeInstance.annotation.getEventInstanceID());
237      element.setAttribute("eventID", makeInstance.annotation.getId());
238    } else if (annotation instanceof Time) {
239      Time time = (Time) annotation;
240      element = new Element("TIMEX3");
241      element.setAttribute("tid", time.getId());
242      element.setAttribute("type", nullToEmpty(time.getTimeType()));
243      element.setAttribute("value", nullToEmpty(time.getValue()));
244    } else if (annotation instanceof TemporalLink) {
245      TemporalLink tlink = (TemporalLink) annotation;
246      Anchor source = tlink.getSource();
247      Anchor target = tlink.getTarget();
248      element = new Element("TLINK");
249      element.setAttribute("lid", tlink.getId());
250      element.setAttribute("relType", tlink.getRelationType());
251      if (source instanceof Event) {
252        Event event = (Event) source;
253        element.setAttribute("eventInstanceID", event.getEventInstanceID());
254      } else if (source instanceof Time) {
255        element.setAttribute("timeID", source.getId());
256      }
257      if (target instanceof Event) {
258        Event event = (Event) target;
259        element.setAttribute("relatedToEventInstance", event.getEventInstanceID());
260      } else if (target instanceof Time) {
261        element.setAttribute("relatedToTime", target.getId());
262      }
263    } else {
264      throw new IllegalArgumentException("Unsupported annotation type: " + annotation);
265    }
266    return element;
267  }
268  
269  private static String nullToEmpty(String string) {
270    if (string == null) {
271      string = "";
272    }
273    return string;
274  }
275
276  private static class Span implements Comparable<Span>{
277    int begin;
278    int end;
279
280    public Span(int begin, int end) {
281      this.begin = begin;
282      this.end = end;
283    }
284    
285    public Span(AnnotationFS annotation) {
286      this(annotation.getBegin(), annotation.getEnd());
287    }
288    
289    @Override
290    public String toString() {
291      ToStringHelper helper = Objects.toStringHelper(this.getClass()); 
292      return helper.add("begin", this.begin).add("end", this.end).toString();
293    }
294
295    @Override
296    public int hashCode() {
297      return Objects.hashCode(this.begin, this.end);
298    }
299
300    @Override
301    public boolean equals(Object obj) {
302      boolean result = false;
303      if (obj.getClass().equals(Span.class)) {
304        Span that = (Span)obj;
305        result = this.begin == that.begin && this.end == that.end; 
306      }
307      return result;
308    }
309
310    @Override
311    public int compareTo(Span that) {
312      int compare = Ints.compare(this.begin, that.begin);
313      if (compare != 0) {
314        compare = Ints.compare(this.end, that.end);
315      }
316      return compare;
317    }
318    
319    static Ordering<Span> BY_DECREASING_BEGIN = new Ordering<Span>() {
320      @Override
321      public int compare(Span left, Span right) {
322        return -Ints.compare(left.begin, right.begin);
323      }
324    };
325    
326    static Ordering<Span> BY_INCREASING_END = new Ordering<Span>() {
327      @Override
328      public int compare(Span left, Span right) {
329        return Ints.compare(left.end, right.end);
330      }
331    };
332  }
333  
334  private static class MakeInstance extends FakeAnnotation<Event> {
335    public MakeInstance(Event annotation, int offset) {
336      super(annotation, offset, offset);
337    }
338  }
339  
340  private static class DCT extends FakeAnnotation<Time> {
341    public DCT(Time time) {
342      super(time, time.getBegin(), time.getEnd());
343    }
344  }
345  
346  // Fake annotation for various elements
347  private static class FakeAnnotation<T extends Annotation> implements AnnotationFS {
348    
349    T annotation;
350    private int begin;
351    private int end;
352
353    public FakeAnnotation(T annotation, int begin, int end) {
354      this.annotation = annotation;
355      this.begin = begin;
356      this.end = end;
357    }
358
359    @Override
360    public Object clone() {
361      throw new UnsupportedOperationException();
362    }
363
364    @Override
365    public CAS getView() {
366      throw new UnsupportedOperationException();
367    }
368
369    @Override
370    public Type getType() {
371      throw new UnsupportedOperationException();
372    }
373
374    @Override
375    public void setFeatureValue(Feature feat, FeatureStructure fs) throws CASRuntimeException {
376      throw new UnsupportedOperationException();
377    }
378
379    @Override
380    public FeatureStructure getFeatureValue(Feature feat) throws CASRuntimeException {
381      throw new UnsupportedOperationException();
382    }
383
384    @Override
385    public void setStringValue(Feature feat, String s) throws CASRuntimeException {
386      throw new UnsupportedOperationException();
387    }
388
389    @Override
390    public String getStringValue(Feature f) throws CASRuntimeException {
391      throw new UnsupportedOperationException();
392    }
393
394    @Override
395    public float getFloatValue(Feature feat) throws CASRuntimeException {
396      throw new UnsupportedOperationException();
397    }
398
399    @Override
400    public void setFloatValue(Feature feat, float f) throws CASRuntimeException {
401      throw new UnsupportedOperationException();
402    }
403
404    @Override
405    public int getIntValue(Feature feat) throws CASRuntimeException {
406      throw new UnsupportedOperationException();
407    }
408
409    @Override
410    public void setIntValue(Feature feat, int i) throws CASRuntimeException {
411      throw new UnsupportedOperationException();
412    }
413
414    @Override
415    public byte getByteValue(Feature feat) throws CASRuntimeException {
416      throw new UnsupportedOperationException();
417    }
418
419    @Override
420    public void setByteValue(Feature feat, byte i) throws CASRuntimeException {
421      throw new UnsupportedOperationException();
422    }
423
424    @Override
425    public boolean getBooleanValue(Feature feat) throws CASRuntimeException {
426      throw new UnsupportedOperationException();
427    }
428
429    @Override
430    public void setBooleanValue(Feature feat, boolean i) throws CASRuntimeException {
431      throw new UnsupportedOperationException();
432    }
433
434    @Override
435    public short getShortValue(Feature feat) throws CASRuntimeException {
436      throw new UnsupportedOperationException();
437    }
438
439    @Override
440    public void setShortValue(Feature feat, short i) throws CASRuntimeException {
441      throw new UnsupportedOperationException();
442    }
443
444    @Override
445    public long getLongValue(Feature feat) throws CASRuntimeException {
446      throw new UnsupportedOperationException();
447    }
448
449    @Override
450    public void setLongValue(Feature feat, long i) throws CASRuntimeException {
451      throw new UnsupportedOperationException();
452    }
453
454    @Override
455    public double getDoubleValue(Feature feat) throws CASRuntimeException {
456      throw new UnsupportedOperationException();
457    }
458
459    @Override
460    public void setDoubleValue(Feature feat, double i) throws CASRuntimeException {
461      throw new UnsupportedOperationException();
462    }
463
464    @Override
465    public String getFeatureValueAsString(Feature feat) throws CASRuntimeException {
466      throw new UnsupportedOperationException();
467    }
468
469    @Override
470    public void setFeatureValueFromString(Feature feat, String s) throws CASRuntimeException {
471      throw new UnsupportedOperationException();
472    }
473
474    @Override
475    public CAS getCAS() {
476      throw new UnsupportedOperationException();
477    }
478
479    @Override
480    public int getBegin() {
481      return this.begin;
482    }
483
484    @Override
485    public int getEnd() {
486      return this.end;
487    }
488
489    @Override
490    public String getCoveredText() {
491      throw new UnsupportedOperationException();
492    }
493  }
494}