/*
 * Decompiled with CFR 0.152.
 */
package edu.nyu.jet.zoner;

import edu.nyu.jet.aceJet.Ace;
import edu.nyu.jet.tipster.Annotation;
import edu.nyu.jet.tipster.Document;
import edu.nyu.jet.tipster.Span;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;

public class SentenceSplitter {
    static HashSet abbreviations = new HashSet();
    static HashSet monocaseAbbreviations = new HashSet();
    static final String[] dividingAnnotations;

    public static void split(Document doc, Span textSpan) {
        int sentenceEnd;
        int start;
        int end = textSpan.end();
        HashSet boundaries = SentenceSplitter.annotationBoundaries(doc, start, end);
        String text = doc.text();
        for (start = textSpan.start(); start < end && Character.isWhitespace(text.charAt(start)); ++start) {
        }
        int posn = start;
        int tokenCount = 0;
        int sentenceStart = start;
        String currentToken = null;
        boolean boundaryAfterCurrentToken = false;
        boolean startOfSentence = true;
        if (posn >= end) {
            return;
        }
        while (posn < end) {
            int nextTokenStart = posn;
            while (posn < end && !Character.isWhitespace(text.charAt(posn))) {
                ++posn;
            }
            String nextToken = text.substring(nextTokenStart, posn);
            ++tokenCount;
            boolean boundaryAfterNextToken = boundaries.contains(new Integer(posn));
            while (posn < end && Character.isWhitespace(text.charAt(posn))) {
                boundaryAfterNextToken |= boundaries.contains(new Integer(++posn));
            }
            if (boundaryAfterCurrentToken || SentenceSplitter.isSentenceEnd(currentToken, nextToken, startOfSentence) || SentenceSplitter.isDatelineEnd(currentToken, tokenCount)) {
                sentenceEnd = nextTokenStart;
                doc.annotate("sentence", new Span(sentenceStart, sentenceEnd), null);
                sentenceStart = sentenceEnd;
                startOfSentence = true;
            } else {
                startOfSentence = false;
            }
            currentToken = nextToken;
            boundaryAfterCurrentToken = boundaryAfterNextToken;
        }
        sentenceEnd = end;
        if (sentenceStart != sentenceEnd) {
            doc.annotate("sentence", new Span(sentenceStart, sentenceEnd), null);
        }
    }

    private static HashSet annotationBoundaries(Document doc, int start, int end) {
        HashSet<Integer> boundaries = new HashSet<Integer>();
        for (int i = 0; i < dividingAnnotations.length; ++i) {
            Vector<Annotation> annotations = doc.annotationsOfType(dividingAnnotations[i]);
            if (annotations == null) continue;
            for (int j = 0; j < annotations.size(); ++j) {
                int annEnd;
                Annotation ann = annotations.get(j);
                Span span = ann.span();
                int annStart = span.start();
                if (annStart >= start && annStart <= end) {
                    boundaries.add(new Integer(annStart));
                }
                if ((annEnd = span.end()) < start || annEnd > end) continue;
                boundaries.add(new Integer(annEnd));
            }
        }
        return boundaries;
    }

    private static boolean isSentenceEnd(String currentToken, String nextToken, boolean startOfSentence) {
        char nextToken2;
        if (currentToken == null) {
            return false;
        }
        int cTL = currentToken.length();
        if (SentenceSplitter.isAbbreviation(currentToken)) {
            return false;
        }
        if (cTL > 1 && SentenceSplitter.in(currentToken.charAt(0), "`'\"([{<") && SentenceSplitter.isAbbreviation(currentToken.substring(1))) {
            return false;
        }
        if (cTL > 2 && (currentToken.charAt(0) == '\'' && currentToken.charAt(1) == '\'' || currentToken.charAt(0) == '`' && currentToken.charAt(1) == '`') && SentenceSplitter.isAbbreviation(currentToken.substring(2))) {
            return false;
        }
        char currentToken0 = currentToken.charAt(cTL - 1);
        char currentToken1 = cTL > 1 ? (char)currentToken.charAt(cTL - 2) : (char)' ';
        char currentToken2 = cTL > 2 ? (char)currentToken.charAt(cTL - 3) : (char)' ';
        int nTL = nextToken.length();
        char nextToken0 = nextToken.charAt(0);
        char nextToken1 = nTL > 1 ? (char)nextToken.charAt(1) : (char)' ';
        char c = nextToken2 = nTL > 2 ? (char)nextToken.charAt(2) : (char)' ';
        if (!(Character.isUpperCase(nextToken0) || Ace.monocase || Character.isUpperCase(nextToken1) && SentenceSplitter.in(nextToken0, "`'\"([{<") || Character.isUpperCase(nextToken2) && (nextToken0 == '`' && nextToken1 == '`' || nextToken0 == '\'' && nextToken1 == '\'') || nextToken.equals("_") || nextToken0 == '<')) {
            return false;
        }
        if (currentToken0 == '?' || currentToken0 == '!' || SentenceSplitter.in(currentToken1, "?!.") && SentenceSplitter.in(currentToken0, "\"'}>)") || SentenceSplitter.in(currentToken2, "?!.") && currentToken1 == '\'' && currentToken0 == '\'') {
            return true;
        }
        if (currentToken0 != '.') {
            return false;
        }
        if (startOfSentence && (currentToken.equalsIgnoreCase("Q.") || currentToken.equalsIgnoreCase("A."))) {
            return true;
        }
        if (cTL == 2 && (Ace.monocase ? Character.isLetter(currentToken1) : Character.isUpperCase(currentToken1))) {
            return false;
        }
        if (cTL == 4 && currentToken2 == '.' && (Ace.monocase ? Character.isLetter(currentToken1) && Character.isLetter(currentToken.charAt(0)) : Character.isUpperCase(currentToken1) && Character.isUpperCase(currentToken.charAt(0)))) {
            return false;
        }
        if (Ace.monocase) {
            if (currentToken.equalsIgnoreCase("U.S.") || currentToken.equalsIgnoreCase("U.N.") || currentToken.equalsIgnoreCase("D.C.")) {
                return false;
            }
            if (currentToken.equals("U.S.") || currentToken.equals("U.N.") || currentToken.equals("D.C.")) {
                return false;
            }
        }
        if (nextToken0 == '<') {
            return true;
        }
        return nextToken0 != '(' || !nextToken.endsWith(")") && !nextToken.endsWith(").") && !nextToken.endsWith("),");
    }

    private static boolean in(char c, String s) {
        return s.indexOf(c) >= 0;
    }

    private static boolean forcesCap(Annotation currentToken, Document doc) {
        if (currentToken == null) {
            return false;
        }
        String word = doc.text(currentToken).trim();
        return word.equals("\"") || word.equals("'");
    }

    private static boolean isAbbreviation(String token) {
        if (Ace.monocase) {
            return monocaseAbbreviations.contains(token.toLowerCase());
        }
        return abbreviations.contains(token);
    }

    private static boolean isDatelineEnd(String currentToken, int tokenCount) {
        return currentToken != null && currentToken.equals("_") && tokenCount <= 5;
    }

    static {
        abbreviations.add("Adm.");
        abbreviations.add("Brig.");
        abbreviations.add("Capt.");
        abbreviations.add("Cmdr.");
        abbreviations.add("Col.");
        abbreviations.add("Dr.");
        abbreviations.add("Gen.");
        abbreviations.add("Gov.");
        abbreviations.add("Lt.");
        abbreviations.add("Maj.");
        abbreviations.add("Messrs.");
        abbreviations.add("Mr.");
        abbreviations.add("Mrs.");
        abbreviations.add("Ms.");
        abbreviations.add("Prof.");
        abbreviations.add("Rep.");
        abbreviations.add("Reps.");
        abbreviations.add("Rev.");
        abbreviations.add("Sen.");
        abbreviations.add("Sens.");
        abbreviations.add("Sgt.");
        abbreviations.add("Sr.");
        abbreviations.add("St.");
        abbreviations.add("Alex.");
        abbreviations.add("Benj.");
        abbreviations.add("Chas.");
        abbreviations.add("a.k.a.");
        abbreviations.add("c.f.");
        abbreviations.add("i.e.");
        abbreviations.add("vs.");
        abbreviations.add("v.");
        abbreviations.add("e.g.");
        abbreviations.add("U.S.");
        abbreviations.add("U.N.");
        abbreviations.add("D.C.");
        Iterator it = abbreviations.iterator();
        while (it.hasNext()) {
            monocaseAbbreviations.add(((String)it.next()).toLowerCase());
        }
        dividingAnnotations = new String[]{"POST", "POSTER", "POSTDATE", "SUBJECT", "SPEAKER", "TURN", "P", "dateline", "textBreak"};
    }
}

