001/* 002 * Copyright (c) 2011, Regents of the University of Colorado 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions are met: 007 * 008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 011 * 012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 022 * POSSIBILITY OF SUCH DAMAGE. 023 */ 024package org.cleartk.corpus.timeml; 025 026import java.util.ArrayList; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030 031import org.apache.uima.analysis_engine.AnalysisEngineDescription; 032import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 033import org.apache.uima.cas.CAS; 034import org.apache.uima.cas.CASException; 035import org.apache.uima.jcas.JCas; 036import org.apache.uima.jcas.tcas.Annotation; 037import org.apache.uima.resource.ResourceInitializationException; 038import org.cleartk.timeml.type.Anchor; 039import org.cleartk.timeml.type.DocumentCreationTime; 040import org.cleartk.timeml.type.Event; 041import org.cleartk.timeml.type.TemporalLink; 042import org.cleartk.timeml.type.Time; 043import org.cleartk.token.type.Sentence; 044import org.cleartk.token.type.Token; 045import org.apache.uima.fit.component.JCasAnnotator_ImplBase; 046import org.apache.uima.fit.descriptor.ConfigurationParameter; 047import org.apache.uima.fit.factory.AnalysisEngineFactory; 048import org.apache.uima.fit.util.JCasUtil; 049 050import com.google.common.base.Joiner; 051import com.google.common.collect.ArrayListMultimap; 052import com.google.common.collect.ListMultimap; 053 054/** 055 * <br> 056 * Copyright (c) 2011, Regents of the University of Colorado <br> 057 * All rights reserved. 058 * 059 * @author Steven Bethard 060 */ 061public class TempEval2010GoldAnnotator extends JCasAnnotator_ImplBase { 062 063 public static AnalysisEngineDescription getDescription() throws ResourceInitializationException { 064 return AnalysisEngineFactory.createEngineDescription(TempEval2010GoldAnnotator.class); 065 } 066 067 @ConfigurationParameter( 068 name = PARAM_TEXT_VIEWS, 069 mandatory = false, 070 defaultValue = CAS.NAME_DEFAULT_SOFA, 071 description = "Views where document text should be placed") 072 private String[] textViews; 073 074 @ConfigurationParameter( 075 name = PARAM_DOCUMENT_CREATION_TIME_VIEWS, 076 mandatory = false, 077 defaultValue = CAS.NAME_DEFAULT_SOFA, 078 description = "Views where DocumentCreationTime annotations should be placed") 079 private String[] documentCreationTimeViews; 080 081 @ConfigurationParameter( 082 name = PARAM_TIME_EXTENT_VIEWS, 083 mandatory = false, 084 defaultValue = CAS.NAME_DEFAULT_SOFA, 085 description = "Views where Time annotations should be placed") 086 private String[] timeExtentViews; 087 088 @ConfigurationParameter( 089 name = PARAM_TIME_ATTRIBUTE_VIEWS, 090 mandatory = false, 091 defaultValue = CAS.NAME_DEFAULT_SOFA, 092 description = "Views where Time annotation attributes should be placed") 093 private String[] timeAttributeViews; 094 095 @ConfigurationParameter( 096 name = PARAM_EVENT_EXTENT_VIEWS, 097 mandatory = false, 098 defaultValue = CAS.NAME_DEFAULT_SOFA, 099 description = "Views where Event annotations should be placed") 100 private String[] eventExtentViews; 101 102 @ConfigurationParameter( 103 name = PARAM_EVENT_ATTRIBUTE_VIEWS, 104 mandatory = false, 105 defaultValue = CAS.NAME_DEFAULT_SOFA, 106 description = "Views where Event annotation attributes should be placed") 107 private String[] eventAttributeViews; 108 109 @ConfigurationParameter( 110 name = PARAM_TEMPORAL_LINK_EVENT_TO_DOCUMENT_CREATION_TIME_VIEWS, 111 mandatory = false, 112 defaultValue = CAS.NAME_DEFAULT_SOFA, 113 description = "Views where TemporalLink annotations between events and the document creation time should be placed") 114 private String[] temporalLinkEventToDocumentCreationTimeViews; 115 116 @ConfigurationParameter( 117 name = PARAM_TEMPORAL_LINK_EVENT_TO_SAME_SENTENCE_TIME_VIEWS, 118 mandatory = false, 119 defaultValue = CAS.NAME_DEFAULT_SOFA, 120 description = "Views where TemporalLink annotations between events and times within the same sentence should be placed") 121 private String[] temporalLinkEventToSameSentenceTimeViews; 122 123 @ConfigurationParameter( 124 name = PARAM_TEMPORAL_LINK_EVENT_TO_SUBORDINATED_EVENT_VIEWS, 125 mandatory = false, 126 defaultValue = CAS.NAME_DEFAULT_SOFA, 127 description = "Views where TemporalLink annotations between events and syntactically dominated events should be placed") 128 private String[] temporalLinkEventToSubordinatedEventViews; 129 130 @ConfigurationParameter( 131 name = PARAM_TEMPORAL_LINK_MAIN_EVENT_TO_NEXT_SENTENCE_MAIN_EVENT_VIEWS, 132 mandatory = false, 133 defaultValue = CAS.NAME_DEFAULT_SOFA, 134 description = "Views where TemporalLink annotations between main events in adjacent sentences should be placed") 135 private String[] temporalLinkMainEventToNextSentenceMainEventViews; 136 137 public static final String PARAM_TEXT_VIEWS = "textViews"; 138 139 public static final String PARAM_DOCUMENT_CREATION_TIME_VIEWS = "documentCreationTimeViews"; 140 141 public static final String PARAM_TIME_EXTENT_VIEWS = "timeExtentViews"; 142 143 public static final String PARAM_TIME_ATTRIBUTE_VIEWS = "timeAttributeViews"; 144 145 public static final String PARAM_EVENT_EXTENT_VIEWS = "eventExtentViews"; 146 147 public static final String PARAM_EVENT_ATTRIBUTE_VIEWS = "eventAttributeViews"; 148 149 public static final String PARAM_TEMPORAL_LINK_EVENT_TO_DOCUMENT_CREATION_TIME_VIEWS = "temporalLinkEventToDocumentCreationTimeViews"; 150 151 public static final String PARAM_TEMPORAL_LINK_EVENT_TO_SAME_SENTENCE_TIME_VIEWS = "temporalLinkEventToSameSentenceTimeViews"; 152 153 public static final String PARAM_TEMPORAL_LINK_EVENT_TO_SUBORDINATED_EVENT_VIEWS = "temporalLinkEventToSubordinatedEventViews"; 154 155 public static final String PARAM_TEMPORAL_LINK_MAIN_EVENT_TO_NEXT_SENTENCE_MAIN_EVENT_VIEWS = "temporalLinkMainEventToNextSentenceMainEventViews"; 156 157 @Override 158 public void process(JCas jCas) throws AnalysisEngineProcessException { 159 160 // load the sentences and tokens from the view 161 ListMultimap<Integer, String> sentTokens = ArrayListMultimap.create(); 162 for (String line : lines(jCas, TempEval2010CollectionReader.BASE_SEGMENTATION_VIEW_NAME)) { 163 String[] columns = split(line, "<filename>", "<sent_no>", "<token_no>", "<text>"); 164 int sentIndex = new Integer(columns[1]); 165 String text = columns[3]; 166 sentTokens.put(sentIndex, text); 167 } 168 169 // create the sentences and tokens 170 Map<String, StringBuilder> textBuilders = new HashMap<String, StringBuilder>(); 171 for (String viewName : this.textViews) { 172 StringBuilder textBuilder = new StringBuilder("\n\n"); // leave line for document time 173 JCas view = JCasUtil.getView(jCas, viewName, true); 174 for (int i = 0; i < sentTokens.keySet().size(); ++i) { 175 int sentBegin = textBuilder.length(); 176 List<Token> tokens = new ArrayList<Token>(); 177 for (String tokenText : sentTokens.get(i)) { 178 int tokenBegin = textBuilder.length(); 179 textBuilder.append(tokenText); 180 int tokenEnd = textBuilder.length(); 181 textBuilder.append(' '); 182 Token token = new Token(view, tokenBegin, tokenEnd); 183 token.addToIndexes(); 184 tokens.add(token); 185 } 186 int sentEnd = textBuilder.length() - 1; 187 textBuilder.setCharAt(sentEnd, '\n'); 188 Sentence sentence = new Sentence(view, sentBegin, sentEnd); 189 sentence.addToIndexes(); 190 } 191 textBuilders.put(viewName, textBuilder); 192 } 193 194 // add the document creation time 195 for (String line : lines(jCas, TempEval2010CollectionReader.DCT_VIEW_NAME)) { 196 String[] dctColumns = split(line, "<filename>", "<dct>"); 197 String dctValue = dctColumns[1].replaceAll("(\\d{4})(\\d{2})(\\d{2})", "$1-$2-$3"); 198 for (String viewName : this.documentCreationTimeViews) { 199 JCas view = JCasUtil.getView(jCas, viewName, true); 200 DocumentCreationTime docTime = new DocumentCreationTime(view, 1, 1); 201 docTime.setId("t0"); 202 docTime.setTimeType("DATE"); 203 docTime.setValue(dctValue); 204 docTime.setFunctionInDocument("CREATION_TIME"); 205 docTime.addToIndexes(); 206 } 207 } 208 209 // add Time annotations 210 addSpans( 211 jCas, 212 TempEval2010CollectionReader.TIMEX_EXTENTS_VIEW_NAME, 213 "timex3", 214 this.timeExtentViews, 215 new AnnotationConstructor<Time>() { 216 @Override 217 public Time apply(JCas aJCas, int begin, int end) { 218 return new Time(aJCas, begin, end); 219 } 220 }); 221 222 // add Time attributes 223 addAttributes( 224 jCas, 225 TempEval2010CollectionReader.TIMEX_ATTRIBUTES_VIEW_NAME, 226 Time.class, 227 this.timeAttributeViews, 228 new AttributeSetter<Time>() { 229 @Override 230 public void apply(Time time, String attrName, String attrValue) { 231 if (attrName.equals("type")) { 232 time.setTimeType(attrValue); 233 } else if (attrName.equals("value")) { 234 time.setValue(attrValue); 235 } else { 236 String message = "Unexpected TIMEX attribute %s=%s"; 237 throw new IllegalArgumentException(String.format(message, attrName, attrValue)); 238 } 239 } 240 }); 241 242 // add Event annotations 243 addSpans( 244 jCas, 245 TempEval2010CollectionReader.EVENT_EXTENTS_VIEW_NAME, 246 "event", 247 this.eventExtentViews, 248 new AnnotationConstructor<Event>() { 249 @Override 250 public Event apply(JCas aJCas, int begin, int end) { 251 return new Event(aJCas, begin, end); 252 } 253 }); 254 255 // add Event attributes 256 addAttributes( 257 jCas, 258 TempEval2010CollectionReader.EVENT_ATTRIBUTES_VIEW_NAME, 259 Event.class, 260 this.eventAttributeViews, 261 new AttributeSetter<Event>() { 262 @Override 263 public void apply(Event event, String attrName, String attrValue) { 264 if (attrName.equals("pos")) { 265 event.setPos(attrValue); 266 } else if (attrName.equals("tense")) { 267 event.setTense(attrValue); 268 } else if (attrName.equals("aspect")) { 269 event.setAspect(attrValue); 270 } else if (attrName.equals("class")) { 271 event.setEventClass(attrValue); 272 } else if (attrName.equals("polarity")) { 273 event.setPolarity(attrValue); 274 } else if (attrName.equals("modality")) { 275 event.setModality(attrValue); 276 } else { 277 String message = "Unexpected EVENT attribute %s=%s"; 278 throw new IllegalArgumentException(String.format(message, attrName, attrValue)); 279 } 280 } 281 }); 282 283 // add TemporalLink annotations 284 addTemporalLinks( 285 jCas, 286 TempEval2010CollectionReader.TLINK_DCT_EVENT_VIEW_NAME, 287 textBuilders, 288 this.temporalLinkEventToDocumentCreationTimeViews); 289 addTemporalLinks( 290 jCas, 291 TempEval2010CollectionReader.TLINK_TIMEX_EVENT_VIEW_NAME, 292 textBuilders, 293 this.temporalLinkEventToSameSentenceTimeViews); 294 addTemporalLinks( 295 jCas, 296 TempEval2010CollectionReader.TLINK_SUBORDINATED_EVENTS_VIEW_NAME, 297 textBuilders, 298 this.temporalLinkEventToSubordinatedEventViews); 299 addTemporalLinks( 300 jCas, 301 TempEval2010CollectionReader.TLINK_MAIN_EVENTS_VIEW_NAME, 302 textBuilders, 303 this.temporalLinkMainEventToNextSentenceMainEventViews); 304 305 // set the document text 306 for (String viewName : this.textViews) { 307 JCas view = JCasUtil.getView(jCas, viewName, true); 308 view.setDocumentText(textBuilders.get(viewName).toString()); 309 } 310 } 311 312 private static String[] split(String line, String... expected) { 313 String[] columns = line.split("\t"); 314 if (columns.length != expected.length) { 315 throw new IllegalArgumentException(String.format( 316 "Expected % d items, %s, found %d items, %s", 317 expected.length, 318 Joiner.on('\t').join(expected), 319 columns.length, 320 line)); 321 } 322 return columns; 323 } 324 325 private static String[] lines(JCas jCas, String viewName) throws AnalysisEngineProcessException { 326 JCas view; 327 try { 328 view = jCas.getView(viewName); 329 } catch (CASException e) { 330 throw new AnalysisEngineProcessException(e); 331 } 332 String text = view.getDocumentText(); 333 if (text == null) { 334 throw new IllegalArgumentException("no text in view " + viewName); 335 } 336 return text.length() > 0 ? text.split("\n") : new String[0]; 337 } 338 339 private static interface AnnotationConstructor<T extends Annotation> { 340 public T apply(JCas jCas, int begin, int end); 341 } 342 343 private static <T extends Anchor> void addSpans( 344 JCas jCas, 345 String tabViewName, 346 String elementName, 347 String[] annotationViewNames, 348 AnnotationConstructor<T> constructor) throws AnalysisEngineProcessException { 349 String[] lines = lines(jCas, tabViewName); 350 for (String annotationViewName : annotationViewNames) { 351 JCas view = JCasUtil.getView(jCas, annotationViewName, true); 352 Map<String, T> idMap = new HashMap<String, T>(); 353 List<List<Token>> sentenceTokens = new ArrayList<List<Token>>(); 354 for (Sentence sentence : JCasUtil.select(view, Sentence.class)) { 355 sentenceTokens.add(JCasUtil.selectCovered(view, Token.class, sentence)); 356 } 357 for (String line : lines) { 358 String[] columns = split( 359 line, 360 "<filename>", 361 "<sent_no>", 362 "<token_no>", 363 elementName, 364 "<id>", 365 "1"); 366 int sentIndex = Integer.parseInt(columns[1]); 367 int tokenIndex = Integer.parseInt(columns[2]); 368 String id = columns[4]; 369 Token token = sentenceTokens.get(sentIndex).get(tokenIndex); 370 if (!idMap.containsKey(id)) { 371 T ann = constructor.apply(view, token.getBegin(), token.getEnd()); 372 ann.setId(id); 373 ann.addToIndexes(); 374 idMap.put(id, ann); 375 } else { 376 T ann = idMap.get(id); 377 if (token.getBegin() < ann.getBegin()) { 378 ann.setBegin(token.getBegin()); 379 } 380 if (token.getEnd() > ann.getEnd()) { 381 ann.setEnd(token.getEnd()); 382 } 383 } 384 } 385 } 386 } 387 388 private static interface AttributeSetter<T extends Annotation> { 389 public void apply(T ann, String attrName, String attrValue); 390 } 391 392 private static <T extends Anchor> void addAttributes( 393 JCas jCas, 394 String tabViewName, 395 Class<T> cls, 396 String[] annotationViewNames, 397 AttributeSetter<T> setter) throws AnalysisEngineProcessException { 398 String[] lines = lines(jCas, tabViewName); 399 for (String annotationViewName : annotationViewNames) { 400 JCas view = JCasUtil.getView(jCas, annotationViewName, false); 401 Map<String, T> idMap = new HashMap<String, T>(); 402 for (T anchor : JCasUtil.select(view, cls)) { 403 idMap.put(anchor.getId(), anchor); 404 } 405 for (String line : lines) { 406 String[] columns = split( 407 line, 408 "<filename>", 409 "<sent_no>", 410 "<token_no>", 411 "timex3", 412 "<id>", 413 "1", 414 "<attribute>", 415 "<value>"); 416 String id = columns[4]; 417 String attrName = columns[6]; 418 String attrValue = columns[7]; 419 setter.apply(idMap.get(id), attrName, attrValue); 420 } 421 } 422 } 423 424 private static void addTemporalLinks( 425 JCas jCas, 426 String tabViewName, 427 Map<String, StringBuilder> textBuilders, 428 String[] annotationViewNames) throws AnalysisEngineProcessException { 429 String[] lines = lines(jCas, tabViewName); 430 for (String annotationViewName : annotationViewNames) { 431 JCas view = JCasUtil.getView(jCas, annotationViewName, true); 432 Map<String, Anchor> idAnchors = new HashMap<String, Anchor>(); 433 for (Anchor anchor : JCasUtil.select(view, Anchor.class)) { 434 idAnchors.put(anchor.getId(), anchor); 435 } 436 StringBuilder textBuilder = textBuilders.get(annotationViewName); 437 for (String line : lines) { 438 String[] columns = split(line, "<filename>", "<eid>", "<tid>", "<relation>"); 439 String sourceID = columns[1]; 440 String targetID = columns[2]; 441 String relation = columns[3]; 442 int offset = textBuilder.length(); 443 TemporalLink tlink = new TemporalLink(view, offset, offset); 444 tlink.setSource(idAnchors.get(sourceID)); 445 tlink.setTarget(idAnchors.get(targetID)); 446 tlink.setRelationType(relation); 447 tlink.addToIndexes(); 448 textBuilder.append('\n'); 449 } 450 } 451 } 452}