001/** 
002 * Copyright (c) 2011, Regents of the University of Colorado 
003 * All rights reserved.
004 * 
005 * Redistribution and use in source and binary forms, with or without
006 * modification, are permitted provided that the following conditions are met:
007 * 
008 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
009 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
010 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
011 * 
012 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
013 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
014 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
015 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
016 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
017 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
018 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
019 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
020 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
021 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
022 * POSSIBILITY OF SUCH DAMAGE. 
023 */
024
025package org.cleartk.opennlp.tools.parser;
026
027import java.util.ArrayList;
028import java.util.List;
029
030import opennlp.tools.parser.AbstractBottomUpParser;
031import opennlp.tools.parser.Parse;
032
033import org.apache.uima.jcas.JCas;
034import org.apache.uima.jcas.cas.FSArray;
035import org.apache.uima.jcas.tcas.Annotation;
036import org.cleartk.syntax.constituent.type.TerminalTreebankNode;
037import org.cleartk.syntax.constituent.type.TopTreebankNode;
038import org.cleartk.syntax.constituent.type.TreebankNode;
039import org.apache.uima.fit.util.FSCollectionFactory;
040
041import com.google.common.annotations.Beta;
042
043/**
044 * <br>
045 * Copyright (c) 2011, Regents of the University of Colorado <br>
046 * All rights reserved.
047 * <p>
048 * 
049 * @author Philip Ogren
050 */
051@Beta
052public class DefaultOutputTypesHelper<TOKEN_TYPE extends Annotation, SENTENCE_TYPE extends Annotation>
053    implements OutputTypesHelper<TOKEN_TYPE, SENTENCE_TYPE, Parse, TreebankNode> {
054
055  @Override
056  public TreebankNode addParse(
057      JCas jCas,
058      Parse parse,
059      SENTENCE_TYPE sentence,
060      List<TOKEN_TYPE> tokens) {
061
062    TopTreebankNode node = new TopTreebankNode(jCas);
063    this.setAttributes(node, parse, null, jCas);
064    node.addToIndexes();
065
066    StringBuffer sb = new StringBuffer();
067    parse.show(sb);
068    node.setTreebankParse(sb.toString());
069    List<TreebankNode> terminals = getTerminals(node);
070    node.setTerminals(new FSArray(jCas, terminals.size()));
071    FSCollectionFactory.fillArrayFS(node.getTerminals(), terminals);
072    return node;
073  }
074
075  private void setAttributes(TreebankNode node, Parse parse, TreebankNode parent, JCas jCas) {
076    node.setParent(parent);
077    node.setNodeType(parse.getType());
078    node.setBegin(parse.getSpan().getStart());
079    node.setEnd(parse.getSpan().getEnd());
080
081    // leaf node
082    if (isLeaf(parse)) {
083      node.setLeaf(true);
084      node.setNodeValue(parse.getChildren()[0].toString());
085      node.setChildren(new FSArray(jCas, 0));
086    }
087
088    // branch node
089    else {
090      node.setLeaf(false);
091      node.setNodeValue(null);
092      List<TreebankNode> childNodes = new ArrayList<TreebankNode>();
093      for (Parse childParse : parse.getChildren()) {
094        TreebankNode childNode = isLeaf(childParse)
095            ? new TerminalTreebankNode(jCas)
096            : new TreebankNode(jCas);
097        this.setAttributes(childNode, childParse, node, jCas);
098        childNode.addToIndexes();
099        childNodes.add(childNode);
100      }
101      node.setChildren(new FSArray(jCas, childNodes.size()));
102      FSCollectionFactory.fillArrayFS(node.getChildren(), childNodes);
103    }
104  }
105
106  protected List<TreebankNode> getTerminals(TreebankNode node) {
107    List<TreebankNode> tList = new ArrayList<TreebankNode>();
108    int nChildren = node.getChildren().size();
109    if (nChildren == 0) {
110      tList.add(node);
111    }
112    for (int i = 0; i < nChildren; ++i) {
113      tList.addAll(getTerminals(node.getChildren(i)));
114    }
115    return tList;
116  }
117
118  private boolean isLeaf(Parse parse) {
119    Parse[] childParses = parse.getChildren();
120    return childParses.length == 1 && childParses[0].getType() == AbstractBottomUpParser.TOK_NODE;
121  }
122}