001/** 002 * Copyright (c) 2007-2008, Regents of the University of Colorado 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions are met: 007 * 008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 011 * 012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 022 * POSSIBILITY OF SUCH DAMAGE. 023 */ 024package org.cleartk.corpus.propbank.util; 025 026import java.util.ArrayList; 027import java.util.Arrays; 028import java.util.HashSet; 029import java.util.List; 030import java.util.Set; 031 032import org.apache.uima.jcas.JCas; 033import org.apache.uima.jcas.cas.FSArray; 034import org.apache.uima.jcas.tcas.Annotation; 035import org.cleartk.srl.type.SemanticArgument; 036import org.cleartk.syntax.constituent.type.TopTreebankNode; 037import org.cleartk.util.AnnotationUtil; 038import org.apache.uima.fit.util.FSCollectionFactory; 039 040import com.google.common.annotations.Beta; 041 042/** 043 * <br> 044 * Copyright (c) 2007-2008, Regents of the University of Colorado <br> 045 * All rights reserved. 046 * 047 * 048 * <p> 049 * A <em>Proplabel object</em> represents one label of an entry in Propbank. 050 * </p> 051 * 052 * @author Philipp Wetzler, Steven Bethard 053 */ 054@Beta 055public class Proplabel { 056 /** 057 * Parses one label taken form a Propbank entry and returns its representation as a 058 * <em>Proplabel</em> object. 059 * 060 * @param lblTxt 061 * one label part of one line from <tt>prop.txt</tt> 062 * 063 * @return a <em>Proplabel</em> object representing <b>lblTxt</b> 064 */ 065 static Proplabel fromString(String lblTxt) { 066 // split the string by hyphens and catch some simple errors 067 String[] columns = lblTxt.split("-"); 068 if (columns.length < 1) { 069 throw new PropbankFormatException(String.format("Missing label: %s", lblTxt)); 070 } 071 if (!Proplabel.labels.contains(columns[1])) { 072 throw new PropbankFormatException(String.format("Invalid label: %s", columns[1])); 073 } 074 075 // set the relation and label 076 Proplabel proplabel = new Proplabel(); 077 proplabel.setPropTxt(lblTxt); 078 proplabel.setRelation(PropbankRelation.fromString(columns[0])); 079 proplabel.setLabel(columns[1]); 080 081 // second column may be feature, hyphen tag or preposition 082 // third column may only be hyphen tag following feature 083 int expectedLength = 2; 084 if (columns.length > 2) { 085 if (Proplabel.features.contains(columns[2])) { 086 proplabel.setFeature(columns[2]); 087 if (columns.length > 3) { 088 if (Proplabel.hyphenTags.contains(columns[3])) { 089 proplabel.setHyphenTag(columns[3]); 090 } 091 expectedLength = 4; 092 } else { 093 expectedLength = 3; 094 } 095 } else if (Proplabel.hyphenTags.contains(columns[2])) { 096 proplabel.setHyphenTag(columns[2]); 097 expectedLength = 3; 098 } else { 099 proplabel.setPreposition(columns[2]); 100 expectedLength = 3; 101 } 102 } 103 104 // throw some exceptions for bad input 105 if (columns.length != expectedLength) { 106 throw new PropbankFormatException(String.format( 107 "Expected %d items, found %d", 108 expectedLength, 109 columns.length)); 110 } 111 if (Proplabel.labelsRequiringFeatures.contains(columns[1])) { 112 if (proplabel.getFeature() == null) { 113 throw new PropbankFormatException(String.format( 114 "Label %s requires a feature", 115 proplabel.getLabel())); 116 } 117 } 118 return proplabel; 119 } 120 121 protected PropbankRelation relation; 122 123 protected String label; 124 125 protected String feature; 126 127 protected String preposition; 128 129 protected String hyphenTag; 130 131 protected String propTxt; 132 133 protected Proplabel() { 134 relation = null; 135 label = null; 136 feature = null; 137 preposition = null; 138 } 139 140 public String getFeature() { 141 return feature; 142 } 143 144 public void setFeature(String feature) { 145 this.feature = feature; 146 } 147 148 public String getLabel() { 149 return label; 150 } 151 152 public void setLabel(String label) { 153 this.label = label; 154 } 155 156 public String getPreposition() { 157 return preposition; 158 } 159 160 public void setPreposition(String preposition) { 161 this.preposition = preposition; 162 } 163 164 public String getHyphenTag() { 165 return hyphenTag; 166 } 167 168 public void setHyphenTag(String hyphenTag) { 169 this.hyphenTag = hyphenTag; 170 } 171 172 public PropbankRelation getRelation() { 173 return relation; 174 } 175 176 public void setRelation(PropbankRelation relation) { 177 this.relation = relation; 178 } 179 180 public String getPropTxt() { 181 return propTxt; 182 } 183 184 public void setPropTxt(String propTxt) { 185 this.propTxt = propTxt; 186 } 187 188 /** 189 * Convert to ClearTK <em>SemanticArgument</em> annotation and add it to <b>view</b>. 190 * 191 * @param view 192 * the view where the annotation will be added 193 * @param topNode 194 * the top node annotation of the corresponding Treebank parse 195 * @return the generated <em>SemanticArgument</em> annotation 196 */ 197 public SemanticArgument convert(JCas view, TopTreebankNode topNode) { 198 SemanticArgument argument = new SemanticArgument(view); 199 argument.setPropTxt(this.propTxt); 200 argument.setLabel(this.label); 201 argument.setFeature(this.feature); 202 argument.setPreposition(this.preposition); 203 argument.setHyphenTag(this.hyphenTag); 204 if (this.relation instanceof PropbankCorefRelation) { 205 List<Annotation> annotations = new ArrayList<Annotation>(); 206 List<Annotation> substantiveAnnotations = new ArrayList<Annotation>(); 207 208 for (PropbankRelation rel : ((PropbankCorefRelation) this.relation).getCorefRelations()) { 209 Annotation a = rel.convert(view, topNode); 210 annotations.add(a); 211 if (a.getBegin() != a.getEnd()) { 212 substantiveAnnotations.add(a); 213 } 214 } 215 argument.setCoreferenceAnnotations(new FSArray(view, annotations.size())); 216 FSCollectionFactory.fillArrayFS(argument.getCoreferenceAnnotations(), annotations); 217 218 if (substantiveAnnotations.isEmpty()) { 219 Annotation lastAnnotation = annotations.get(annotations.size() - 1); 220 argument.setBegin(lastAnnotation.getBegin()); 221 argument.setEnd(lastAnnotation.getEnd()); 222 } else { 223 int[] extent = AnnotationUtil.getAnnotationsExtent(substantiveAnnotations); 224 argument.setBegin(extent[0]); 225 argument.setEnd(extent[1]); 226 } 227 228 if (substantiveAnnotations.size() == 1) { 229 argument.setAnnotation(substantiveAnnotations.get(0)); 230 } 231 } else { 232 argument.setAnnotation(this.relation.convert(view, topNode)); 233 argument.setBegin(argument.getAnnotation().getBegin()); 234 argument.setEnd(argument.getAnnotation().getEnd()); 235 } 236 argument.addToIndexes(); 237 238 return argument; 239 } 240 241 /** 242 * Re-generate the Propbank text that this object was parsed from. 243 */ 244 @Override 245 public String toString() { 246 StringBuffer buffer = new StringBuffer(); 247 248 buffer.append(getRelation().toString()); 249 buffer.append("-" + getLabel()); 250 if (getFeature() != null) 251 buffer.append("-" + getFeature()); 252 if (getHyphenTag() != null) 253 buffer.append("-" + getHyphenTag()); 254 if (getPreposition() != null) 255 buffer.append("-" + getPreposition()); 256 257 return buffer.toString(); 258 } 259 260 private static final Set<String> labels = new HashSet<String>( 261 Arrays.asList("rel|Support|ARG0|ARG1|ARG2|ARG3|ARG4|ARG5|ARGA|ARGM".split("\\|"))); 262 263 private static final Set<String> labelsRequiringFeatures = new HashSet<String>( 264 Arrays.asList(new String[] { "ARGM" })); 265 266 private static final Set<String> features = new HashSet<String>( 267 Arrays.asList("ADV|CAU|DIR|DIS|EXT|LOC|MNR|MOD|NEG|PNC|PRD|REC|TMP".split("\\|"))); 268 269 private static final Set<String> hyphenTags = new HashSet<String>( 270 Arrays.asList("H0|H1|H2|H3|H4|H5|H6|H7|H8|H9|XX".split("\\|"))); 271 272}