package com.ibm.avatar.algebra.util.tokenize;

import com.ibm.avatar.algebra.util.lang.LangCode;
import java.util.TreeSet;

/* loaded from: input_file:com/ibm/avatar/algebra/util/tokenize/StandardTokenizer.class */
public class StandardTokenizer extends Tokenizer {
    private static final byte LETTER_OR_DIGIT_MASK = 1;
    private static final byte WHITESPACE_MASK = 2;
    private static final byte[] CHAR_CLASS_TAB = buildCharClassTab();

    private static byte[] buildCharClassTab() {
        byte[] bArr = new byte[65535];
        char c = 0;
        while (true) {
            char c2 = c;
            if (c2 >= bArr.length) {
                return bArr;
            }
            byte b = 0;
            if (Character.isLetterOrDigit(c2)) {
                b = (byte) (0 | 1);
            }
            if (Character.isWhitespace(c2)) {
                b = (byte) (b | 2);
            }
            bArr[c2] = b;
            c = (char) (c2 + 1);
        }
    }

    private boolean isWhitespace(byte b) {
        return 0 == (b & 2);
    }

    private boolean isLetterOrDigit(byte b) {
        return 0 == (b & 1);
    }

    @Deprecated
    protected void oldTokenizeStr(CharSequence charSequence, int i, int i2, BaseOffsetsList baseOffsetsList) {
        baseOffsetsList.reset();
        int i3 = i;
        int length = charSequence.length();
        while (i3 < length && baseOffsetsList.numUsed < i2) {
            if (Character.isLetterOrDigit(charSequence.charAt(i3))) {
                int i4 = i3;
                while (i3 < length && Character.isLetterOrDigit(charSequence.charAt(i3))) {
                    i3++;
                }
                baseOffsetsList.addEntry(i4, i3);
            } else if (Character.isWhitespace(charSequence.charAt(i3))) {
                i3++;
            } else {
                baseOffsetsList.addEntry(i3, i3 + 1);
                i3++;
            }
        }
    }

    @Override // com.ibm.avatar.algebra.util.tokenize.Tokenizer
    public void tokenizeStr(CharSequence charSequence, LangCode langCode, BaseOffsetsList baseOffsetsList) {
        byte b;
        baseOffsetsList.reset();
        int length = charSequence.length();
        int i = 0;
        byte maskAtPos = getMaskAtPos(charSequence, length, 0);
        while (true) {
            b = maskAtPos;
            if (i >= length || !isLetterOrDigit(b)) {
                break;
            }
            if (isWhitespace(b)) {
                baseOffsetsList.addEntry(i, i + 1);
            }
            i++;
            maskAtPos = getMaskAtPos(charSequence, length, i);
        }
        int i2 = i;
        while (i < length) {
            if (isLetterOrDigit(b)) {
                baseOffsetsList.addEntry(i2, i);
                while (i < length && isLetterOrDigit(b)) {
                    if (isWhitespace(b)) {
                        baseOffsetsList.addEntry(i, i + 1);
                    }
                    i++;
                    b = getMaskAtPos(charSequence, length, i);
                }
                i2 = i;
            }
            i++;
            b = getMaskAtPos(charSequence, length, i);
        }
        if (i != length || i == i2) {
            return;
        }
        baseOffsetsList.addEntry(i2, i);
    }

    private byte getMaskAtPos(CharSequence charSequence, int i, int i2) {
        if (i2 < i) {
            return CHAR_CLASS_TAB[charSequence.charAt(i2)];
        }
        return (byte) 0;
    }

    @Override // com.ibm.avatar.algebra.util.tokenize.Tokenizer
    public boolean supportsPOSTagging() {
        return false;
    }

    @Override // com.ibm.avatar.algebra.util.tokenize.Tokenizer
    public TreeSet<Integer> decodePOSSpec(String str, LangCode langCode) {
        throw new RuntimeException("Standard tokenizer does not tag parts of speech");
    }

    @Override // com.ibm.avatar.algebra.util.tokenize.Tokenizer
    public CharSequence posCodeToString(int i, LangCode langCode) {
        throw new RuntimeException("Standard tokenizer does not tag parts of speech");
    }

    @Override // com.ibm.avatar.algebra.util.tokenize.Tokenizer
    public boolean supportLemmatization() {
        return false;
    }

    @Override // com.ibm.avatar.algebra.util.tokenize.Tokenizer
    public String getName() {
        return "STANDARD";
    }
}
