package com.ibm.avatar.algebra.extract;

import com.ibm.avatar.algebra.base.ExtractionOp;
import com.ibm.avatar.algebra.base.MemoizationTable;
import com.ibm.avatar.algebra.base.Operator;
import com.ibm.avatar.algebra.datamodel.Span;
import com.ibm.avatar.algebra.datamodel.Tuple;
import com.ibm.avatar.algebra.util.lang.LangCode;
import com.ibm.avatar.algebra.util.tokenize.OffsetsList;
import com.ibm.avatar.algebra.util.tokenize.StandardTokenizer;
import com.ibm.avatar.algebra.util.tokenize.Tokenizer;
import com.ibm.avatar.api.exceptions.TextAnalyticsException;
import java.util.HashSet;

/* loaded from: input_file:com/ibm/avatar/algebra/extract/PartOfSpeech.class */
public class PartOfSpeech extends ExtractionOp {
    private LangCode language;
    private String posStr;
    private HashSet<Integer> posCodes;

    public PartOfSpeech(String str, String str2, String str3, String str4, Operator operator) {
        super(str, str4, operator);
        this.posCodes = new HashSet<>();
        this.language = LangCode.strToLangCode(str2);
        this.posStr = str3;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.ibm.avatar.algebra.base.Operator
    public void initStateInternal(MemoizationTable memoizationTable) throws TextAnalyticsException {
        super.initStateInternal(memoizationTable);
        Tokenizer tokenizer = memoizationTable.getTokenizer();
        if (false == tokenizer.supportsPOSTagging()) {
            throw new RuntimeException(String.format("The %s does not support part of speech tagging. Use the Multilingual tokenizer and part of speech tagger, or another compatible tokenizer that supports part of speech tagging instead.", tokenizer instanceof StandardTokenizer ? "Standard tokenizer" : String.format("current %s tokenizer configuration", tokenizer.getClass().getName())));
        }
        this.posCodes.addAll(tokenizer.decodePOSSpec(this.posStr, this.language));
    }

    @Override // com.ibm.avatar.algebra.base.ExtractionOp
    protected void extract(MemoizationTable memoizationTable, Tuple tuple, Span span) throws Exception {
        if (false == this.language.equals(span.getLanguage())) {
            return;
        }
        memoizationTable.profileEnter(this.tokRecord);
        OffsetsList offsetsList = memoizationTable.getTokenizer().tokenize(span);
        memoizationTable.profileLeave(this.tokRecord);
        for (int i = 0; i < offsetsList.size(); i++) {
            if (this.posCodes.contains(Integer.valueOf(offsetsList.index(i)))) {
                addResultAnnot(tuple, offsetsList.begin(i), offsetsList.end(i), span, memoizationTable);
            }
        }
    }

    @Override // com.ibm.avatar.algebra.base.Operator
    protected boolean requiresPartOfSpeechInternal() {
        return true;
    }
}
