001/** 002 * Copyright (c) 2007-2008, Regents of the University of Colorado 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions are met: 007 * 008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 011 * 012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 022 * POSSIBILITY OF SUCH DAMAGE. 023 */ 024package org.cleartk.corpus.timeml; 025 026import java.io.IOException; 027import java.io.StringReader; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.HashMap; 031import java.util.HashSet; 032import java.util.List; 033import java.util.Map; 034import java.util.Set; 035 036import org.apache.uima.UimaContext; 037import org.apache.uima.analysis_engine.AnalysisEngineDescription; 038import org.apache.uima.analysis_engine.AnalysisEngineProcessException; 039import org.apache.uima.cas.CAS; 040import org.apache.uima.cas.CASException; 041import org.apache.uima.jcas.JCas; 042import org.apache.uima.resource.ResourceInitializationException; 043import org.apache.uima.util.Level; 044import org.cleartk.timeml.type.Anchor; 045import org.cleartk.timeml.type.DocumentCreationTime; 046import org.cleartk.timeml.type.Event; 047import org.cleartk.timeml.type.TemporalLink; 048import org.cleartk.timeml.type.Text; 049import org.cleartk.timeml.type.Time; 050import org.cleartk.token.type.Sentence; 051import org.cleartk.util.ViewUriUtil; 052import org.jdom2.Content; 053import org.jdom2.Document; 054import org.jdom2.Element; 055import org.jdom2.JDOMException; 056import org.jdom2.input.SAXBuilder; 057import org.jdom2.output.XMLOutputter; 058import org.apache.uima.fit.component.JCasAnnotator_ImplBase; 059import org.apache.uima.fit.descriptor.ConfigurationParameter; 060import org.apache.uima.fit.descriptor.SofaCapability; 061import org.apache.uima.fit.factory.AnalysisEngineFactory; 062 063/** 064 * <br> 065 * Copyright (c) 2007-2008, Regents of the University of Colorado <br> 066 * All rights reserved. 067 * 068 * 069 * @author Steven Bethard 070 * 071 */ 072@SofaCapability(inputSofas = { TimeMlGoldAnnotator.TIMEML_VIEW_NAME, CAS.NAME_DEFAULT_SOFA }) 073public class TimeMlGoldAnnotator extends JCasAnnotator_ImplBase { 074 075 public static final String TIMEML_VIEW_NAME = "TimeMLView"; 076 077 public static final String PARAM_LOAD_TLINKS = "loadTlinks"; 078 079 @ConfigurationParameter( 080 name = PARAM_LOAD_TLINKS, 081 mandatory = false, 082 description = "when false indicates that annotation should not be created for TLINKs (though annotations will still be created for TIMEX3s, EVENTs, etc.).", 083 defaultValue = "true") 084 private boolean loadTlinks; 085 086 public static AnalysisEngineDescription getDescription() throws ResourceInitializationException { 087 return AnalysisEngineFactory.createEngineDescription(TimeMlGoldAnnotator.class); 088 } 089 090 public static AnalysisEngineDescription getDescriptionNoTLINKs() 091 throws ResourceInitializationException { 092 return AnalysisEngineFactory.createEngineDescription( 093 TimeMlGoldAnnotator.class, 094 PARAM_LOAD_TLINKS, 095 false); 096 } 097 098 @Override 099 public void initialize(UimaContext context) throws ResourceInitializationException { 100 super.initialize(context); 101 } 102 103 @Override 104 public void process(JCas jCas) throws AnalysisEngineProcessException { 105 JCas timemlView; 106 JCas initialView; 107 try { 108 timemlView = jCas.getView(TIMEML_VIEW_NAME); 109 initialView = jCas.getView(CAS.NAME_DEFAULT_SOFA); 110 } catch (CASException e) { 111 throw new AnalysisEngineProcessException(e); 112 } 113 114 String timeML = timemlView.getDocumentText(); 115 SAXBuilder builder = new SAXBuilder(); 116 builder.setDTDHandler(null); 117 Element root; 118 try { 119 Document doc = builder.build(new StringReader(timeML)); 120 root = doc.getRootElement(); 121 } catch (JDOMException e) { 122 getContext().getLogger().log( 123 Level.SEVERE, 124 "problem parsing document: " + ViewUriUtil.getURI(jCas)); 125 throw new AnalysisEngineProcessException(e); 126 } catch (IOException e) { 127 throw new AnalysisEngineProcessException(e); 128 } 129 130 // collect the document text, add the Event, Time and TemporalLink annotations, 131 // and collect the necessary information to fill in the cross-annotation links 132 StringBuffer textBuffer = new StringBuffer(); 133 Map<String, Anchor> anchors = new HashMap<String, Anchor>(); 134 Map<Time, String> anchorTimeIDs = new HashMap<Time, String>(); 135 List<Element> makeInstances = new ArrayList<Element>(); 136 Map<TemporalLink, String> tlinkSourceIDs = new HashMap<TemporalLink, String>(); 137 Map<TemporalLink, String> tlinkTargetIDs = new HashMap<TemporalLink, String>(); 138 this.addAnnotations( 139 initialView, 140 root, 141 textBuffer, 142 anchors, 143 anchorTimeIDs, 144 makeInstances, 145 tlinkSourceIDs, 146 tlinkTargetIDs); 147 initialView.setDocumentText(textBuffer.toString()); 148 149 // point make-instance IDs to their events, and copy attributes over 150 Set<Event> processedEvents = new HashSet<Event>(); 151 for (Element makeInstance : makeInstances) { 152 String eventID = makeInstance.getAttributeValue("eventID"); 153 String eventInstanceID = makeInstance.getAttributeValue("eiid"); 154 Event event = (Event) this.getAnchor(jCas, anchors, eventID); 155 anchors.put(eventInstanceID, event); 156 if (!processedEvents.contains(event)) { 157 TimeMlUtil.copyAttributes(makeInstance, event, jCas); 158 processedEvents.add(event); 159 } else { 160 String makeInstanceXML = new XMLOutputter().outputString(makeInstance); 161 String message = "Ignoring attributes from additional %s in %s"; 162 String fileName = ViewUriUtil.getURI(jCas).toString(); 163 this.getLogger().warn(String.format(message, makeInstanceXML, fileName)); 164 } 165 } 166 167 // set anchor times 168 for (Time time : anchorTimeIDs.keySet()) { 169 Time anchorTime = (Time) this.getAnchor(jCas, anchors, anchorTimeIDs.get(time)); 170 time.setAnchorTime(anchorTime); 171 } 172 173 // set tlink sources and targets 174 for (TemporalLink tlink : tlinkSourceIDs.keySet()) { 175 tlink.setSource(this.getAnchor(jCas, anchors, tlinkSourceIDs.get(tlink))); 176 tlink.setTarget(this.getAnchor(jCas, anchors, tlinkTargetIDs.get(tlink))); 177 } 178 } 179 180 private void addAnnotations( 181 JCas jCas, 182 Element element, 183 StringBuffer textBuffer, 184 Map<String, Anchor> anchors, 185 Map<Time, String> anchorTimeIDs, 186 List<Element> makeInstances, 187 Map<TemporalLink, String> tlinkSourceIDs, 188 Map<TemporalLink, String> tlinkTargetIDs) throws AnalysisEngineProcessException { 189 int startOffset = textBuffer.length(); 190 for (Content content : element.getContent()) { 191 if (content instanceof org.jdom2.Text) { 192 textBuffer.append(((org.jdom2.Text) content).getText()); 193 } else if (content instanceof Element) { 194 this.addAnnotations( 195 jCas, 196 (Element) content, 197 textBuffer, 198 anchors, 199 anchorTimeIDs, 200 makeInstances, 201 tlinkSourceIDs, 202 tlinkTargetIDs); 203 } 204 } 205 int endOffset = textBuffer.length(); 206 207 String elementName = element.getName().toUpperCase(); 208 if (elementName.equals("TIMEX3")) { 209 String funcInDoc = element.getAttributeValue("functionInDocument"); 210 boolean isCreationTime = funcInDoc != null && funcInDoc.equals("CREATION_TIME"); 211 Time time = isCreationTime 212 ? new DocumentCreationTime(jCas, startOffset, endOffset) 213 : new Time(jCas, startOffset, endOffset); 214 TimeMlUtil.copyAttributes(element, time, jCas); 215 String anchorTimeID = element.getAttributeValue("anchorTimeID"); 216 if (anchorTimeID != null) { 217 anchorTimeIDs.put(time, anchorTimeID); 218 } 219 anchors.put(time.getId(), time); 220 time.addToIndexes(); 221 } else if (elementName.equals("EVENT")) { 222 Event event = new Event(jCas, startOffset, endOffset); 223 TimeMlUtil.copyAttributes(element, event, jCas); 224 anchors.put(event.getId(), event); 225 event.addToIndexes(); 226 } else if (elementName.equals("MAKEINSTANCE")) { 227 makeInstances.add(element); 228 } else if (elementName.equals("TLINK") && this.loadTlinks) { 229 TemporalLink temporalLink = new TemporalLink(jCas, startOffset, endOffset); 230 TimeMlUtil.copyAttributes(element, temporalLink, jCas); 231 String sourceID = this.getOneOf(element, "eventInstanceID", "eventID", "timeID"); 232 String targetID = this.getOneOf( 233 element, 234 "relatedToEventInstance", 235 "relatedToEvent", 236 "relatedToTime"); 237 tlinkSourceIDs.put(temporalLink, sourceID); 238 tlinkTargetIDs.put(temporalLink, targetID); 239 temporalLink.addToIndexes(); 240 } else if (elementName.equals("TEXT")) { 241 Text text = new Text(jCas, startOffset, endOffset); 242 text.addToIndexes(); 243 } else if (elementName.toLowerCase().equals("s")) { 244 Sentence sentence = new Sentence(jCas, startOffset, endOffset); 245 sentence.addToIndexes(); 246 } 247 } 248 249 private String getOneOf(Element element, String... attributeNames) { 250 for (String name : attributeNames) { 251 String result = element.getAttributeValue(name); 252 if (result != null) { 253 return result; 254 } 255 } 256 throw new RuntimeException(String.format( 257 "unable to find in %s any of the following attributes: %s", 258 element, 259 Arrays.asList(attributeNames))); 260 } 261 262 private Anchor getAnchor(JCas jCas, Map<String, Anchor> anchors, String id) 263 throws AnalysisEngineProcessException { 264 Anchor anchor = anchors.get(id); 265 if (anchor == null) { 266 throw new RuntimeException(String.format( 267 "%s: no anchor for id %s", 268 ViewUriUtil.getURI(jCas), 269 id)); 270 } 271 return anchor; 272 } 273 274 public void setLoadTlinks(boolean loadTLINKs) { 275 this.loadTlinks = loadTLINKs; 276 } 277}