001/* 002 * Copyright (c) 2011, Regents of the University of Colorado 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions are met: 007 * 008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 011 * 012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 022 * POSSIBILITY OF SUCH DAMAGE. 023 */ 024package org.cleartk.timeml.tlink; 025 026import java.util.ArrayList; 027import java.util.List; 028import java.util.regex.Pattern; 029 030import org.apache.uima.UimaContext; 031import org.apache.uima.analysis_engine.AnalysisEngineDescription; 032import org.apache.uima.jcas.JCas; 033import org.apache.uima.resource.ResourceInitializationException; 034import org.cleartk.ml.feature.extractor.CleartkExtractor; 035import org.cleartk.ml.feature.extractor.CoveredTextExtractor; 036import org.cleartk.ml.feature.extractor.FeatureExtractor1; 037import org.cleartk.ml.feature.extractor.FeatureExtractor2; 038import org.cleartk.ml.feature.extractor.TypePathExtractor; 039import org.cleartk.ml.feature.extractor.CleartkExtractor.Bag; 040import org.cleartk.ml.feature.extractor.CleartkExtractor.Covered; 041import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter; 042import org.cleartk.syntax.constituent.type.TreebankNode; 043import org.cleartk.syntax.constituent.type.TreebankNodeUtil; 044import org.cleartk.timeml.type.Anchor; 045import org.cleartk.timeml.type.Event; 046import org.cleartk.timeml.util.CleartkInternalModelFactory; 047import org.cleartk.feature.syntax.SyntacticFirstChildOfGrandparentOfLeafExtractor; 048import org.cleartk.feature.syntax.SyntacticLeafToLeafPathPartsExtractor; 049import org.cleartk.token.type.Sentence; 050import org.cleartk.token.type.Token; 051import org.apache.uima.fit.factory.AnalysisEngineFactory; 052import org.apache.uima.fit.util.JCasUtil; 053 054import com.google.common.collect.Lists; 055 056/** 057 * <br> 058 * Copyright (c) 2011, Regents of the University of Colorado <br> 059 * All rights reserved. 060 * 061 * @author Steven Bethard 062 */ 063public class TemporalLinkEventToSubordinatedEventAnnotator extends 064 TemporalLinkAnnotator_ImplBase<Event, Event> { 065 066 public static final CleartkInternalModelFactory FACTORY = new CleartkInternalModelFactory() { 067 @Override 068 public Class<?> getAnnotatorClass() { 069 return TemporalLinkEventToSubordinatedEventAnnotator.class; 070 } 071 072 @Override 073 public Class<?> getDataWriterClass() { 074 return LibLinearStringOutcomeDataWriter.class; 075 } 076 077 @Override 078 public AnalysisEngineDescription getBaseDescription() throws ResourceInitializationException { 079 return AnalysisEngineFactory.createEngineDescription(TemporalLinkEventToSubordinatedEventAnnotator.class); 080 } 081 }; 082 083 public TemporalLinkEventToSubordinatedEventAnnotator() { 084 super(Event.class, Event.class, "BEFORE", "AFTER"); 085 } 086 087 private static final Pattern SUBORDINATE_PATH_PATTERN = Pattern.compile("^(VP>|ADJP>|NP>)?(VP|ADJP|S|SBAR)(<(S|SBAR|PP))*((<VP|<ADJP)*|(<NP)*)$"); 088 089 @Override 090 public void initialize(UimaContext context) throws ResourceInitializationException { 091 super.initialize(context); 092 093 List<FeatureExtractor1<Event>> extractors = Lists.newArrayList(); 094 extractors.add(new TypePathExtractor<Event>(Event.class, "tense")); 095 extractors.add(new TypePathExtractor<Event>(Event.class, "aspect")); 096 extractors.add(new TypePathExtractor<Event>(Event.class, "eventClass")); 097 extractors.add(new SyntacticFirstChildOfGrandparentOfLeafExtractor<Event>()); 098 099 this.setSourceExtractors(extractors); 100 this.setTargetExtractors(extractors); 101 102 List<FeatureExtractor2<Anchor, Anchor>>btweenExtractors = Lists.newArrayList(); 103 btweenExtractors.add(new SyntacticLeafToLeafPathPartsExtractor<Anchor, Anchor>()); 104 btweenExtractors.add(new CleartkExtractor<Anchor, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered()))); 105 this.setBetweenExtractors(btweenExtractors); 106 } 107 108 @Override 109 protected List<SourceTargetPair> getSourceTargetPairs(JCas jCas) { 110 List<SourceTargetPair> pairs = Lists.newArrayList(); 111 for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { 112 for (Event source : JCasUtil.selectCovered(jCas, Event.class, sentence)) { 113 for (Event target : this.getSubordinateEvents(jCas, source, sentence)) { 114 pairs.add(new SourceTargetPair(source, target)); 115 } 116 } 117 } 118 return pairs; 119 } 120 121 private List<Event> getSubordinateEvents(JCas jCas, Event source, Sentence sentence) { 122 List<Event> targets = new ArrayList<Event>(); 123 TreebankNode sourceNode = TreebankNodeUtil.selectMatchingLeaf(jCas, source); 124 for (Event target : JCasUtil.selectCovered(jCas, Event.class, sentence)) { 125 if (!target.equals(source)) { 126 TreebankNode targetNode = TreebankNodeUtil.selectMatchingLeaf(jCas, target); 127 if (sourceNode != null && targetNode != null) { 128 String path = noLeavesPath(TreebankNodeUtil.getPath(sourceNode, targetNode)); 129 if (SUBORDINATE_PATH_PATTERN.matcher(path).matches()) { 130 targets.add(target); 131 } 132 } 133 } 134 } 135 return targets; 136 } 137}