001/** 
002 * Copyright (c) 2007-2008, Regents of the University of Colorado 
003 * All rights reserved.
004 * 
005 * Redistribution and use in source and binary forms, with or without
006 * modification, are permitted provided that the following conditions are met:
007 * 
008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
011 * 
012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
022 * POSSIBILITY OF SUCH DAMAGE. 
023 */
024package org.cleartk.corpus.propbank.util;
025
026import java.util.ArrayList;
027import java.util.Arrays;
028import java.util.HashSet;
029import java.util.List;
030import java.util.Set;
031
032import org.apache.uima.jcas.JCas;
033import org.apache.uima.jcas.cas.FSArray;
034import org.apache.uima.jcas.tcas.Annotation;
035import org.cleartk.srl.type.SemanticArgument;
036import org.cleartk.syntax.constituent.type.TopTreebankNode;
037import org.cleartk.util.AnnotationUtil;
038import org.apache.uima.fit.util.FSCollectionFactory;
039
040import com.google.common.annotations.Beta;
041
042/**
043 * <br>
044 * Copyright (c) 2007-2008, Regents of the University of Colorado <br>
045 * All rights reserved.
046 * 
047 * 
048 * <p>
049 * A <em>Proplabel object</em> represents one label of an entry in Propbank.
050 * </p>
051 * 
052 * @author Philipp Wetzler, Steven Bethard
053 */
054@Beta
055public class Proplabel {
056  /**
057   * Parses one label taken form a Propbank entry and returns its representation as a
058   * <em>Proplabel</em> object.
059   * 
060   * @param lblTxt
061   *          one label part of one line from <tt>prop.txt</tt>
062   * 
063   * @return a <em>Proplabel</em> object representing <b>lblTxt</b>
064   */
065  static Proplabel fromString(String lblTxt) {
066    // split the string by hyphens and catch some simple errors
067    String[] columns = lblTxt.split("-");
068    if (columns.length < 1) {
069      throw new PropbankFormatException(String.format("Missing label: %s", lblTxt));
070    }
071    if (!Proplabel.labels.contains(columns[1])) {
072      throw new PropbankFormatException(String.format("Invalid label: %s", columns[1]));
073    }
074
075    // set the relation and label
076    Proplabel proplabel = new Proplabel();
077    proplabel.setPropTxt(lblTxt);
078    proplabel.setRelation(PropbankRelation.fromString(columns[0]));
079    proplabel.setLabel(columns[1]);
080
081    // second column may be feature, hyphen tag or preposition
082    // third column may only be hyphen tag following feature
083    int expectedLength = 2;
084    if (columns.length > 2) {
085      if (Proplabel.features.contains(columns[2])) {
086        proplabel.setFeature(columns[2]);
087        if (columns.length > 3) {
088          if (Proplabel.hyphenTags.contains(columns[3])) {
089            proplabel.setHyphenTag(columns[3]);
090          }
091          expectedLength = 4;
092        } else {
093          expectedLength = 3;
094        }
095      } else if (Proplabel.hyphenTags.contains(columns[2])) {
096        proplabel.setHyphenTag(columns[2]);
097        expectedLength = 3;
098      } else {
099        proplabel.setPreposition(columns[2]);
100        expectedLength = 3;
101      }
102    }
103
104    // throw some exceptions for bad input
105    if (columns.length != expectedLength) {
106      throw new PropbankFormatException(String.format(
107          "Expected %d items, found %d",
108          expectedLength,
109          columns.length));
110    }
111    if (Proplabel.labelsRequiringFeatures.contains(columns[1])) {
112      if (proplabel.getFeature() == null) {
113        throw new PropbankFormatException(String.format(
114            "Label %s requires a feature",
115            proplabel.getLabel()));
116      }
117    }
118    return proplabel;
119  }
120
121  protected PropbankRelation relation;
122
123  protected String label;
124
125  protected String feature;
126
127  protected String preposition;
128
129  protected String hyphenTag;
130
131  protected String propTxt;
132
133  protected Proplabel() {
134    relation = null;
135    label = null;
136    feature = null;
137    preposition = null;
138  }
139
140  public String getFeature() {
141    return feature;
142  }
143
144  public void setFeature(String feature) {
145    this.feature = feature;
146  }
147
148  public String getLabel() {
149    return label;
150  }
151
152  public void setLabel(String label) {
153    this.label = label;
154  }
155
156  public String getPreposition() {
157    return preposition;
158  }
159
160  public void setPreposition(String preposition) {
161    this.preposition = preposition;
162  }
163
164  public String getHyphenTag() {
165    return hyphenTag;
166  }
167
168  public void setHyphenTag(String hyphenTag) {
169    this.hyphenTag = hyphenTag;
170  }
171
172  public PropbankRelation getRelation() {
173    return relation;
174  }
175
176  public void setRelation(PropbankRelation relation) {
177    this.relation = relation;
178  }
179
180  public String getPropTxt() {
181    return propTxt;
182  }
183
184  public void setPropTxt(String propTxt) {
185    this.propTxt = propTxt;
186  }
187
188  /**
189   * Convert to ClearTK <em>SemanticArgument</em> annotation and add it to <b>view</b>.
190   * 
191   * @param view
192   *          the view where the annotation will be added
193   * @param topNode
194   *          the top node annotation of the corresponding Treebank parse
195   * @return the generated <em>SemanticArgument</em> annotation
196   */
197  public SemanticArgument convert(JCas view, TopTreebankNode topNode) {
198    SemanticArgument argument = new SemanticArgument(view);
199    argument.setPropTxt(this.propTxt);
200    argument.setLabel(this.label);
201    argument.setFeature(this.feature);
202    argument.setPreposition(this.preposition);
203    argument.setHyphenTag(this.hyphenTag);
204    if (this.relation instanceof PropbankCorefRelation) {
205      List<Annotation> annotations = new ArrayList<Annotation>();
206      List<Annotation> substantiveAnnotations = new ArrayList<Annotation>();
207
208      for (PropbankRelation rel : ((PropbankCorefRelation) this.relation).getCorefRelations()) {
209        Annotation a = rel.convert(view, topNode);
210        annotations.add(a);
211        if (a.getBegin() != a.getEnd()) {
212          substantiveAnnotations.add(a);
213        }
214      }
215      argument.setCoreferenceAnnotations(new FSArray(view, annotations.size()));
216      FSCollectionFactory.fillArrayFS(argument.getCoreferenceAnnotations(), annotations);
217
218      if (substantiveAnnotations.isEmpty()) {
219        Annotation lastAnnotation = annotations.get(annotations.size() - 1);
220        argument.setBegin(lastAnnotation.getBegin());
221        argument.setEnd(lastAnnotation.getEnd());
222      } else {
223        int[] extent = AnnotationUtil.getAnnotationsExtent(substantiveAnnotations);
224        argument.setBegin(extent[0]);
225        argument.setEnd(extent[1]);
226      }
227
228      if (substantiveAnnotations.size() == 1) {
229        argument.setAnnotation(substantiveAnnotations.get(0));
230      }
231    } else {
232      argument.setAnnotation(this.relation.convert(view, topNode));
233      argument.setBegin(argument.getAnnotation().getBegin());
234      argument.setEnd(argument.getAnnotation().getEnd());
235    }
236    argument.addToIndexes();
237
238    return argument;
239  }
240
241  /**
242   * Re-generate the Propbank text that this object was parsed from.
243   */
244  @Override
245  public String toString() {
246    StringBuffer buffer = new StringBuffer();
247
248    buffer.append(getRelation().toString());
249    buffer.append("-" + getLabel());
250    if (getFeature() != null)
251      buffer.append("-" + getFeature());
252    if (getHyphenTag() != null)
253      buffer.append("-" + getHyphenTag());
254    if (getPreposition() != null)
255      buffer.append("-" + getPreposition());
256
257    return buffer.toString();
258  }
259
260  private static final Set<String> labels = new HashSet<String>(
261      Arrays.asList("rel|Support|ARG0|ARG1|ARG2|ARG3|ARG4|ARG5|ARGA|ARGM".split("\\|")));
262
263  private static final Set<String> labelsRequiringFeatures = new HashSet<String>(
264      Arrays.asList(new String[] { "ARGM" }));
265
266  private static final Set<String> features = new HashSet<String>(
267      Arrays.asList("ADV|CAU|DIR|DIS|EXT|LOC|MNR|MOD|NEG|PNC|PRD|REC|TMP".split("\\|")));
268
269  private static final Set<String> hyphenTags = new HashSet<String>(
270      Arrays.asList("H0|H1|H2|H3|H4|H5|H6|H7|H8|H9|XX".split("\\|")));
271
272}