package com.ibm.avatar.algebra.util.sentence;

import com.ibm.avatar.algebra.function.scalar.GetCol;
import java.io.BufferedWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.regex.Matcher;

/* loaded from: input_file:com/ibm/avatar/algebra/util/sentence/SentenceChunker.class */
public class SentenceChunker {
    public static BufferedWriter sentenceBufferedWriter = null;
    private Matcher sentenceEndingMatcher = null;
    private HashSet<String> abbreviations = new HashSet<>();

    public SentenceChunker() {
    }

    public SentenceChunker(String[] strArr) {
        for (String str : strArr) {
            this.abbreviations.add(str);
        }
    }

    public boolean containsSentenceBoundary(String str) {
        int nextCandidateBoundary;
        String str2;
        int nextCandidateBoundary2;
        do {
            setDocumentForObtainingBoundaries(str);
            nextCandidateBoundary = getNextCandidateBoundary();
            if (nextCandidateBoundary != -1) {
                String substring = str.substring(0, nextCandidateBoundary + 1);
                String substring2 = str.substring(nextCandidateBoundary + 1);
                while (true) {
                    str2 = substring2;
                    if ((!doesNotBeginWithPunctuation(str2) || !isFullSentence(substring)) && (nextCandidateBoundary2 = getNextCandidateBoundary()) != -1) {
                        nextCandidateBoundary = nextCandidateBoundary2;
                        substring = str.substring(0, nextCandidateBoundary + 1);
                        substring2 = str.substring(nextCandidateBoundary + 1);
                    }
                }
                if (substring.length() > 0) {
                    return (0 + nextCandidateBoundary) + 1 < str.length();
                }
                str = str2;
            }
        } while (nextCandidateBoundary != -1);
        return false;
    }

    public ArrayList<Integer> getSentenceOffsetArrayList(String str) {
        int nextCandidateBoundary;
        String str2;
        int nextCandidateBoundary2;
        ArrayList<Integer> arrayList = new ArrayList<>();
        int i = 0;
        arrayList.add(new Integer(0));
        do {
            setDocumentForObtainingBoundaries(str);
            nextCandidateBoundary = getNextCandidateBoundary();
            if (nextCandidateBoundary != -1) {
                String substring = str.substring(0, nextCandidateBoundary + 1);
                String substring2 = str.substring(nextCandidateBoundary + 1);
                while (true) {
                    str2 = substring2;
                    if ((!doesNotBeginWithPunctuation(str2) || !isFullSentence(substring)) && (nextCandidateBoundary2 = getNextCandidateBoundary()) != -1) {
                        nextCandidateBoundary = nextCandidateBoundary2;
                        substring = str.substring(0, nextCandidateBoundary + 1);
                        substring2 = str.substring(nextCandidateBoundary + 1);
                    }
                }
                if (substring.length() > 0) {
                    arrayList.add(new Integer(i + nextCandidateBoundary + 1));
                    i += nextCandidateBoundary + 1;
                }
                str = str2;
            }
        } while (nextCandidateBoundary != -1);
        if (str.length() > 0) {
            arrayList.add(new Integer(i + str.length()));
        }
        arrayList.trimToSize();
        return arrayList;
    }

    private void setDocumentForObtainingBoundaries(String str) {
        this.sentenceEndingMatcher = SentenceConstants.sentenceEndingPattern.matcher(str);
    }

    private int getNextCandidateBoundary() {
        if (this.sentenceEndingMatcher.find()) {
            return this.sentenceEndingMatcher.start();
        }
        return -1;
    }

    private boolean doesNotBeginWithPunctuation(String str) {
        return !SentenceConstants.punctuationPattern.matcher(str).find();
    }

    private String getLastWord(String str) {
        Matcher matcher = SentenceConstants.lastWordPattern.matcher(str);
        return matcher.find() ? matcher.group() : GetCol.USAGE;
    }

    private boolean isFullSentence(String str) {
        if (SentenceConstants.validSentenceBoundaryPattern.matcher(str).find()) {
            return true;
        }
        if (SentenceConstants.abbrevPattern.matcher(str).find()) {
            return false;
        }
        return !this.abbreviations.contains(getLastWord(str));
    }
}
