001/** 
002 * Copyright (c) 2007-2008, Regents of the University of Colorado 
003 * All rights reserved.
004 * 
005 * Redistribution and use in source and binary forms, with or without
006 * modification, are permitted provided that the following conditions are met:
007 * 
008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
011 * 
012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
022 * POSSIBILITY OF SUCH DAMAGE. 
023 */
024package org.cleartk.util;
025
026import java.util.ArrayList;
027import java.util.Collections;
028import java.util.Comparator;
029import java.util.List;
030
031import org.apache.uima.jcas.JCas;
032import org.apache.uima.jcas.tcas.Annotation;
033import org.apache.uima.fit.util.JCasUtil;
034
035/**
036 * <br>
037 * Copyright (c) 2007-2008, Regents of the University of Colorado <br>
038 * All rights reserved.
039 * 
040 * 
041 * @author Philip Ogren
042 * 
043 */
044public class AnnotationUtil {
045
046  public static <T extends Annotation> T selectFirstMatching(
047      JCas jCas,
048      Class<T> selectedType,
049      Annotation annotation) {
050    List<T> selected = selectMatching(jCas, selectedType, annotation);
051    return selected.size() > 0 ? selected.get(0) : null;
052  }
053
054  public static <T extends Annotation> List<T> selectMatching(
055      JCas jCas,
056      Class<T> selectedType,
057      Annotation annotation) {
058    List<T> result = new ArrayList<T>();
059    for (T selected : JCasUtil.selectCovered(jCas, selectedType, annotation)) {
060      if (selected.getBegin() == annotation.getBegin() && selected.getEnd() == annotation.getEnd()) {
061        result.add(selected);
062      }
063    }
064    return result;
065  }
066
067  public static boolean contains(Annotation bigAnnotation, Annotation smallAnnotation) {
068    if (bigAnnotation == null || smallAnnotation == null)
069      return false;
070    if (bigAnnotation.getBegin() <= smallAnnotation.getBegin()
071        && bigAnnotation.getEnd() >= smallAnnotation.getEnd())
072      return true;
073    else
074      return false;
075  }
076
077  public static boolean overlaps(Annotation annotation1, Annotation annotation2) {
078    Annotation firstAnnotation, secondAnnotation;
079
080    if (annotation1.getBegin() == annotation2.getBegin())
081      return true;
082
083    if (annotation1.getBegin() < annotation2.getBegin()) {
084      firstAnnotation = annotation1;
085      secondAnnotation = annotation2;
086    } else {
087      firstAnnotation = annotation2;
088      secondAnnotation = annotation1;
089    }
090
091    if (firstAnnotation.getEnd() > secondAnnotation.getBegin())
092      return true;
093    return false;
094
095  }
096
097  public static int size(Annotation annotation) {
098    return annotation.getEnd() - annotation.getBegin();
099  }
100
101  /**
102   * This method provides a way of getting some text before or after an annotation specified by some
103   * "window" of "tokens". For example, you could retrieve the text corresponding to 5 tokens to the
104   * right of a named entity. This may be useful for e.g. creating a report that shows the output of
105   * an annotator such that surrounding text is included.
106   * 
107   * @param annotation
108   *          an annotation to get surrounding/nearby text from. This can be any kind of annotation.
109   * @param tokenClass
110   *          the kind of "tokens" to use. This could be any kind of annotation type as well.
111   * @param numberOfTokens
112   *          the number of tokens to consider when creating a span of text
113   * @param before
114   *          determines whether to return text occuring before the annotation or after
115   * @return a span of text that occurs before or after the annotation which will either end at the
116   *         beginning of the annotation or begin at the end of the annotation. The other edge of
117   *         the span is determined by the start/end location of the "token" found on either side of
118   *         the annotation. If such a "token" does not exist then the other edge of the span will
119   *         be either end of the document text.
120   */
121  public static <TOKEN_TYPE extends Annotation> String getSurroundingText(
122      JCas jCas,
123      Annotation annotation,
124      Class<TOKEN_TYPE> tokenClass,
125      int numberOfTokens,
126      boolean before) {
127
128    if (numberOfTokens < 1)
129      throw new IllegalArgumentException(
130          "numberOfTokens must be greater than zero.  Actual values is: " + numberOfTokens);
131
132    String documentText = jCas.getDocumentText();
133
134    int start;
135    int end;
136
137    if (before) {
138      start = 0;
139      end = annotation.getBegin();
140      List<TOKEN_TYPE> anns = JCasUtil.selectPreceding(jCas, tokenClass, annotation, numberOfTokens);
141      if (anns.size() > 0) {
142        start = anns.get(0).getBegin();
143      }
144    } else {
145      start = annotation.getEnd();
146      end = documentText.length();
147      List<TOKEN_TYPE> anns = JCasUtil.selectFollowing(jCas, tokenClass, annotation, numberOfTokens);
148      if (anns.size() > 0) {
149        end = anns.get(anns.size() - 1).getEnd();
150      }
151    }
152
153    return documentText.substring(start, end);
154  }
155
156  public static <T extends Annotation> void sort(List<T> annotations) {
157
158    Collections.sort(annotations, new Comparator<T>() {
159
160      public int compare(T o1, T o2) {
161        if (o1.getBegin() != o2.getBegin())
162          return o1.getBegin() < o2.getBegin() ? -1 : 1;
163        else if (o1.getEnd() != o2.getEnd())
164          return o1.getEnd() < o2.getEnd() ? -1 : 1;
165        return 0;
166      }
167    });
168  }
169
170  public static int[] getAnnotationsExtent(List<? extends Annotation> annotations) {
171    int start = Integer.MAX_VALUE;
172    int end = 0;
173
174    for (Annotation annotation : annotations) {
175      if (annotation.getBegin() < start)
176        start = annotation.getBegin();
177      if (annotation.getEnd() > end)
178        end = annotation.getEnd();
179    }
180
181    return new int[] { start, end };
182  }
183}