/*
 * Decompiled with CFR 0.152.
 */
package edu.washington.cs.knowitall.nlp;

import edu.washington.cs.knowitall.commonlib.Range;
import edu.washington.cs.knowitall.nlp.ChunkedSentence;
import edu.washington.cs.knowitall.nlp.ChunkerException;
import edu.washington.cs.knowitall.nlp.OpenNlpUtils;
import edu.washington.cs.knowitall.nlp.SentenceChunker;
import edu.washington.cs.knowitall.util.DefaultObjects;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Pattern;
import opennlp.tools.chunker.Chunker;
import opennlp.tools.postag.POSTagger;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.Span;

public class OpenNlpSentenceChunker
implements SentenceChunker {
    private Chunker chunker;
    private POSTagger posTagger;
    private Tokenizer tokenizer;
    private boolean attachOfs = true;
    private boolean attachPossessives = true;
    Pattern convertToSpace = Pattern.compile("\\xa0");

    public OpenNlpSentenceChunker() throws IOException {
        this.tokenizer = DefaultObjects.getDefaultTokenizer();
        this.posTagger = DefaultObjects.getDefaultPosTagger();
        this.chunker = DefaultObjects.getDefaultChunker();
    }

    public OpenNlpSentenceChunker(Tokenizer tokenizer, POSTagger posTagger, Chunker chunker) {
        this.tokenizer = tokenizer;
        this.posTagger = posTagger;
        this.chunker = chunker;
    }

    public boolean attachOfs() {
        return this.attachOfs;
    }

    public boolean attachPossessives() {
        return this.attachPossessives;
    }

    public void attachOfs(boolean attachOfs) {
        this.attachOfs = attachOfs;
    }

    public void attachPossessives(boolean attachPossessives) {
        this.attachPossessives = attachPossessives;
    }

    @Override
    public ChunkedSentence chunkSentence(String sent) throws ChunkerException {
        String[] npChunkTags;
        String[] posTags;
        String[] tokens;
        ArrayList<Range> ranges;
        sent = this.convertToSpace.matcher(sent).replaceAll(" ");
        try {
            Span[] offsets = this.tokenizer.tokenizePos(sent);
            ranges = new ArrayList<Range>(offsets.length);
            ArrayList<String> tokenList = new ArrayList<String>(offsets.length);
            for (Span span : offsets) {
                ranges.add(Range.fromInterval((int)span.getStart(), (int)span.getEnd()));
                tokenList.add(sent.substring(span.getStart(), span.getEnd()));
            }
            tokens = tokenList.toArray(new String[0]);
            posTags = this.posTagger.tag(tokens);
            npChunkTags = this.chunker.chunk(tokens, posTags);
        }
        catch (NullPointerException e) {
            throw new ChunkerException("OpenNLP threw NPE on '" + sent + "'", e);
        }
        if (this.attachOfs) {
            OpenNlpUtils.attachOfs(tokens, npChunkTags);
        }
        if (this.attachPossessives) {
            OpenNlpUtils.attachPossessives(posTags, npChunkTags);
        }
        ChunkedSentence result = new ChunkedSentence(ranges.toArray(new Range[0]), tokens, posTags, npChunkTags);
        return result;
    }
}

