001/*
002 * Copyright (c) 2011, Regents of the University of Colorado 
003 * All rights reserved.
004 * 
005 * Redistribution and use in source and binary forms, with or without
006 * modification, are permitted provided that the following conditions are met:
007 * 
008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
011 * 
012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
022 * POSSIBILITY OF SUCH DAMAGE. 
023 */
024package org.cleartk.corpus.timeml;
025
026import java.util.ArrayList;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030
031import org.apache.uima.analysis_engine.AnalysisEngineDescription;
032import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
033import org.apache.uima.cas.CAS;
034import org.apache.uima.cas.CASException;
035import org.apache.uima.jcas.JCas;
036import org.apache.uima.jcas.tcas.Annotation;
037import org.apache.uima.resource.ResourceInitializationException;
038import org.cleartk.timeml.type.Anchor;
039import org.cleartk.timeml.type.DocumentCreationTime;
040import org.cleartk.timeml.type.Event;
041import org.cleartk.timeml.type.TemporalLink;
042import org.cleartk.timeml.type.Time;
043import org.cleartk.token.type.Sentence;
044import org.cleartk.token.type.Token;
045import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
046import org.apache.uima.fit.descriptor.ConfigurationParameter;
047import org.apache.uima.fit.factory.AnalysisEngineFactory;
048import org.apache.uima.fit.util.JCasUtil;
049
050import com.google.common.base.Joiner;
051import com.google.common.collect.ArrayListMultimap;
052import com.google.common.collect.ListMultimap;
053
054/**
055 * <br>
056 * Copyright (c) 2011, Regents of the University of Colorado <br>
057 * All rights reserved.
058 * 
059 * @author Steven Bethard
060 */
061public class TempEval2010GoldAnnotator extends JCasAnnotator_ImplBase {
062
063  public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
064    return AnalysisEngineFactory.createEngineDescription(TempEval2010GoldAnnotator.class);
065  }
066
067  @ConfigurationParameter(
068      name = PARAM_TEXT_VIEWS,
069      mandatory = false,
070      defaultValue = CAS.NAME_DEFAULT_SOFA,
071      description = "Views where document text should be placed")
072  private String[] textViews;
073
074  @ConfigurationParameter(
075      name = PARAM_DOCUMENT_CREATION_TIME_VIEWS,
076      mandatory = false,
077      defaultValue = CAS.NAME_DEFAULT_SOFA,
078      description = "Views where DocumentCreationTime annotations should be placed")
079  private String[] documentCreationTimeViews;
080
081  @ConfigurationParameter(
082      name = PARAM_TIME_EXTENT_VIEWS,
083      mandatory = false,
084      defaultValue = CAS.NAME_DEFAULT_SOFA,
085      description = "Views where Time annotations should be placed")
086  private String[] timeExtentViews;
087
088  @ConfigurationParameter(
089      name = PARAM_TIME_ATTRIBUTE_VIEWS,
090      mandatory = false,
091      defaultValue = CAS.NAME_DEFAULT_SOFA,
092      description = "Views where Time annotation attributes should be placed")
093  private String[] timeAttributeViews;
094
095  @ConfigurationParameter(
096      name = PARAM_EVENT_EXTENT_VIEWS,
097      mandatory = false,
098      defaultValue = CAS.NAME_DEFAULT_SOFA,
099      description = "Views where Event annotations should be placed")
100  private String[] eventExtentViews;
101
102  @ConfigurationParameter(
103      name = PARAM_EVENT_ATTRIBUTE_VIEWS,
104      mandatory = false,
105      defaultValue = CAS.NAME_DEFAULT_SOFA,
106      description = "Views where Event annotation attributes should be placed")
107  private String[] eventAttributeViews;
108
109  @ConfigurationParameter(
110      name = PARAM_TEMPORAL_LINK_EVENT_TO_DOCUMENT_CREATION_TIME_VIEWS,
111      mandatory = false,
112      defaultValue = CAS.NAME_DEFAULT_SOFA,
113      description = "Views where TemporalLink annotations between events and the document creation time should be placed")
114  private String[] temporalLinkEventToDocumentCreationTimeViews;
115
116  @ConfigurationParameter(
117      name = PARAM_TEMPORAL_LINK_EVENT_TO_SAME_SENTENCE_TIME_VIEWS,
118      mandatory = false,
119      defaultValue = CAS.NAME_DEFAULT_SOFA,
120      description = "Views where TemporalLink annotations between events and times within the same sentence should be placed")
121  private String[] temporalLinkEventToSameSentenceTimeViews;
122
123  @ConfigurationParameter(
124      name = PARAM_TEMPORAL_LINK_EVENT_TO_SUBORDINATED_EVENT_VIEWS,
125      mandatory = false,
126      defaultValue = CAS.NAME_DEFAULT_SOFA,
127      description = "Views where TemporalLink annotations between events and syntactically dominated events should be placed")
128  private String[] temporalLinkEventToSubordinatedEventViews;
129
130  @ConfigurationParameter(
131      name = PARAM_TEMPORAL_LINK_MAIN_EVENT_TO_NEXT_SENTENCE_MAIN_EVENT_VIEWS,
132      mandatory = false,
133      defaultValue = CAS.NAME_DEFAULT_SOFA,
134      description = "Views where TemporalLink annotations between main events in adjacent sentences should be placed")
135  private String[] temporalLinkMainEventToNextSentenceMainEventViews;
136
137  public static final String PARAM_TEXT_VIEWS = "textViews";
138
139  public static final String PARAM_DOCUMENT_CREATION_TIME_VIEWS = "documentCreationTimeViews";
140
141  public static final String PARAM_TIME_EXTENT_VIEWS = "timeExtentViews";
142
143  public static final String PARAM_TIME_ATTRIBUTE_VIEWS = "timeAttributeViews";
144
145  public static final String PARAM_EVENT_EXTENT_VIEWS = "eventExtentViews";
146
147  public static final String PARAM_EVENT_ATTRIBUTE_VIEWS = "eventAttributeViews";
148
149  public static final String PARAM_TEMPORAL_LINK_EVENT_TO_DOCUMENT_CREATION_TIME_VIEWS = "temporalLinkEventToDocumentCreationTimeViews";
150
151  public static final String PARAM_TEMPORAL_LINK_EVENT_TO_SAME_SENTENCE_TIME_VIEWS = "temporalLinkEventToSameSentenceTimeViews";
152
153  public static final String PARAM_TEMPORAL_LINK_EVENT_TO_SUBORDINATED_EVENT_VIEWS = "temporalLinkEventToSubordinatedEventViews";
154
155  public static final String PARAM_TEMPORAL_LINK_MAIN_EVENT_TO_NEXT_SENTENCE_MAIN_EVENT_VIEWS = "temporalLinkMainEventToNextSentenceMainEventViews";
156
157  @Override
158  public void process(JCas jCas) throws AnalysisEngineProcessException {
159
160    // load the sentences and tokens from the view
161    ListMultimap<Integer, String> sentTokens = ArrayListMultimap.create();
162    for (String line : lines(jCas, TempEval2010CollectionReader.BASE_SEGMENTATION_VIEW_NAME)) {
163      String[] columns = split(line, "<filename>", "<sent_no>", "<token_no>", "<text>");
164      int sentIndex = new Integer(columns[1]);
165      String text = columns[3];
166      sentTokens.put(sentIndex, text);
167    }
168
169    // create the sentences and tokens
170    Map<String, StringBuilder> textBuilders = new HashMap<String, StringBuilder>();
171    for (String viewName : this.textViews) {
172      StringBuilder textBuilder = new StringBuilder("\n\n"); // leave line for document time
173      JCas view = JCasUtil.getView(jCas, viewName, true);
174      for (int i = 0; i < sentTokens.keySet().size(); ++i) {
175        int sentBegin = textBuilder.length();
176        List<Token> tokens = new ArrayList<Token>();
177        for (String tokenText : sentTokens.get(i)) {
178          int tokenBegin = textBuilder.length();
179          textBuilder.append(tokenText);
180          int tokenEnd = textBuilder.length();
181          textBuilder.append(' ');
182          Token token = new Token(view, tokenBegin, tokenEnd);
183          token.addToIndexes();
184          tokens.add(token);
185        }
186        int sentEnd = textBuilder.length() - 1;
187        textBuilder.setCharAt(sentEnd, '\n');
188        Sentence sentence = new Sentence(view, sentBegin, sentEnd);
189        sentence.addToIndexes();
190      }
191      textBuilders.put(viewName, textBuilder);
192    }
193
194    // add the document creation time
195    for (String line : lines(jCas, TempEval2010CollectionReader.DCT_VIEW_NAME)) {
196      String[] dctColumns = split(line, "<filename>", "<dct>");
197      String dctValue = dctColumns[1].replaceAll("(\\d{4})(\\d{2})(\\d{2})", "$1-$2-$3");
198      for (String viewName : this.documentCreationTimeViews) {
199        JCas view = JCasUtil.getView(jCas, viewName, true);
200        DocumentCreationTime docTime = new DocumentCreationTime(view, 1, 1);
201        docTime.setId("t0");
202        docTime.setTimeType("DATE");
203        docTime.setValue(dctValue);
204        docTime.setFunctionInDocument("CREATION_TIME");
205        docTime.addToIndexes();
206      }
207    }
208
209    // add Time annotations
210    addSpans(
211        jCas,
212        TempEval2010CollectionReader.TIMEX_EXTENTS_VIEW_NAME,
213        "timex3",
214        this.timeExtentViews,
215        new AnnotationConstructor<Time>() {
216          @Override
217          public Time apply(JCas aJCas, int begin, int end) {
218            return new Time(aJCas, begin, end);
219          }
220        });
221
222    // add Time attributes
223    addAttributes(
224        jCas,
225        TempEval2010CollectionReader.TIMEX_ATTRIBUTES_VIEW_NAME,
226        Time.class,
227        this.timeAttributeViews,
228        new AttributeSetter<Time>() {
229          @Override
230          public void apply(Time time, String attrName, String attrValue) {
231            if (attrName.equals("type")) {
232              time.setTimeType(attrValue);
233            } else if (attrName.equals("value")) {
234              time.setValue(attrValue);
235            } else {
236              String message = "Unexpected TIMEX attribute %s=%s";
237              throw new IllegalArgumentException(String.format(message, attrName, attrValue));
238            }
239          }
240        });
241
242    // add Event annotations
243    addSpans(
244        jCas,
245        TempEval2010CollectionReader.EVENT_EXTENTS_VIEW_NAME,
246        "event",
247        this.eventExtentViews,
248        new AnnotationConstructor<Event>() {
249          @Override
250          public Event apply(JCas aJCas, int begin, int end) {
251            return new Event(aJCas, begin, end);
252          }
253        });
254
255    // add Event attributes
256    addAttributes(
257        jCas,
258        TempEval2010CollectionReader.EVENT_ATTRIBUTES_VIEW_NAME,
259        Event.class,
260        this.eventAttributeViews,
261        new AttributeSetter<Event>() {
262          @Override
263          public void apply(Event event, String attrName, String attrValue) {
264            if (attrName.equals("pos")) {
265              event.setPos(attrValue);
266            } else if (attrName.equals("tense")) {
267              event.setTense(attrValue);
268            } else if (attrName.equals("aspect")) {
269              event.setAspect(attrValue);
270            } else if (attrName.equals("class")) {
271              event.setEventClass(attrValue);
272            } else if (attrName.equals("polarity")) {
273              event.setPolarity(attrValue);
274            } else if (attrName.equals("modality")) {
275              event.setModality(attrValue);
276            } else {
277              String message = "Unexpected EVENT attribute %s=%s";
278              throw new IllegalArgumentException(String.format(message, attrName, attrValue));
279            }
280          }
281        });
282
283    // add TemporalLink annotations
284    addTemporalLinks(
285        jCas,
286        TempEval2010CollectionReader.TLINK_DCT_EVENT_VIEW_NAME,
287        textBuilders,
288        this.temporalLinkEventToDocumentCreationTimeViews);
289    addTemporalLinks(
290        jCas,
291        TempEval2010CollectionReader.TLINK_TIMEX_EVENT_VIEW_NAME,
292        textBuilders,
293        this.temporalLinkEventToSameSentenceTimeViews);
294    addTemporalLinks(
295        jCas,
296        TempEval2010CollectionReader.TLINK_SUBORDINATED_EVENTS_VIEW_NAME,
297        textBuilders,
298        this.temporalLinkEventToSubordinatedEventViews);
299    addTemporalLinks(
300        jCas,
301        TempEval2010CollectionReader.TLINK_MAIN_EVENTS_VIEW_NAME,
302        textBuilders,
303        this.temporalLinkMainEventToNextSentenceMainEventViews);
304
305    // set the document text
306    for (String viewName : this.textViews) {
307      JCas view = JCasUtil.getView(jCas, viewName, true);
308      view.setDocumentText(textBuilders.get(viewName).toString());
309    }
310  }
311
312  private static String[] split(String line, String... expected) {
313    String[] columns = line.split("\t");
314    if (columns.length != expected.length) {
315      throw new IllegalArgumentException(String.format(
316          "Expected % d items, %s, found %d items, %s",
317          expected.length,
318          Joiner.on('\t').join(expected),
319          columns.length,
320          line));
321    }
322    return columns;
323  }
324
325  private static String[] lines(JCas jCas, String viewName) throws AnalysisEngineProcessException {
326    JCas view;
327    try {
328      view = jCas.getView(viewName);
329    } catch (CASException e) {
330      throw new AnalysisEngineProcessException(e);
331    }
332    String text = view.getDocumentText();
333    if (text == null) {
334      throw new IllegalArgumentException("no text in view " + viewName);
335    }
336    return text.length() > 0 ? text.split("\n") : new String[0];
337  }
338
339  private static interface AnnotationConstructor<T extends Annotation> {
340    public T apply(JCas jCas, int begin, int end);
341  }
342
343  private static <T extends Anchor> void addSpans(
344      JCas jCas,
345      String tabViewName,
346      String elementName,
347      String[] annotationViewNames,
348      AnnotationConstructor<T> constructor) throws AnalysisEngineProcessException {
349    String[] lines = lines(jCas, tabViewName);
350    for (String annotationViewName : annotationViewNames) {
351      JCas view = JCasUtil.getView(jCas, annotationViewName, true);
352      Map<String, T> idMap = new HashMap<String, T>();
353      List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
354      for (Sentence sentence : JCasUtil.select(view, Sentence.class)) {
355        sentenceTokens.add(JCasUtil.selectCovered(view, Token.class, sentence));
356      }
357      for (String line : lines) {
358        String[] columns = split(
359            line,
360            "<filename>",
361            "<sent_no>",
362            "<token_no>",
363            elementName,
364            "<id>",
365            "1");
366        int sentIndex = Integer.parseInt(columns[1]);
367        int tokenIndex = Integer.parseInt(columns[2]);
368        String id = columns[4];
369        Token token = sentenceTokens.get(sentIndex).get(tokenIndex);
370        if (!idMap.containsKey(id)) {
371          T ann = constructor.apply(view, token.getBegin(), token.getEnd());
372          ann.setId(id);
373          ann.addToIndexes();
374          idMap.put(id, ann);
375        } else {
376          T ann = idMap.get(id);
377          if (token.getBegin() < ann.getBegin()) {
378            ann.setBegin(token.getBegin());
379          }
380          if (token.getEnd() > ann.getEnd()) {
381            ann.setEnd(token.getEnd());
382          }
383        }
384      }
385    }
386  }
387
388  private static interface AttributeSetter<T extends Annotation> {
389    public void apply(T ann, String attrName, String attrValue);
390  }
391
392  private static <T extends Anchor> void addAttributes(
393      JCas jCas,
394      String tabViewName,
395      Class<T> cls,
396      String[] annotationViewNames,
397      AttributeSetter<T> setter) throws AnalysisEngineProcessException {
398    String[] lines = lines(jCas, tabViewName);
399    for (String annotationViewName : annotationViewNames) {
400      JCas view = JCasUtil.getView(jCas, annotationViewName, false);
401      Map<String, T> idMap = new HashMap<String, T>();
402      for (T anchor : JCasUtil.select(view, cls)) {
403        idMap.put(anchor.getId(), anchor);
404      }
405      for (String line : lines) {
406        String[] columns = split(
407            line,
408            "<filename>",
409            "<sent_no>",
410            "<token_no>",
411            "timex3",
412            "<id>",
413            "1",
414            "<attribute>",
415            "<value>");
416        String id = columns[4];
417        String attrName = columns[6];
418        String attrValue = columns[7];
419        setter.apply(idMap.get(id), attrName, attrValue);
420      }
421    }
422  }
423
424  private static void addTemporalLinks(
425      JCas jCas,
426      String tabViewName,
427      Map<String, StringBuilder> textBuilders,
428      String[] annotationViewNames) throws AnalysisEngineProcessException {
429    String[] lines = lines(jCas, tabViewName);
430    for (String annotationViewName : annotationViewNames) {
431      JCas view = JCasUtil.getView(jCas, annotationViewName, true);
432      Map<String, Anchor> idAnchors = new HashMap<String, Anchor>();
433      for (Anchor anchor : JCasUtil.select(view, Anchor.class)) {
434        idAnchors.put(anchor.getId(), anchor);
435      }
436      StringBuilder textBuilder = textBuilders.get(annotationViewName);
437      for (String line : lines) {
438        String[] columns = split(line, "<filename>", "<eid>", "<tid>", "<relation>");
439        String sourceID = columns[1];
440        String targetID = columns[2];
441        String relation = columns[3];
442        int offset = textBuilder.length();
443        TemporalLink tlink = new TemporalLink(view, offset, offset);
444        tlink.setSource(idAnchors.get(sourceID));
445        tlink.setTarget(idAnchors.get(targetID));
446        tlink.setRelationType(relation);
447        tlink.addToIndexes();
448        textBuilder.append('\n');
449      }
450    }
451  }
452}