package org.deeplearning4j.spark.models.paragraphvectors.functions;

import lombok.NonNull;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.broadcast.Broadcast;
import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration;
import org.deeplearning4j.models.sequencevectors.sequence.Sequence;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.spark.models.sequencevectors.functions.BaseTokenizerFunction;
import org.deeplearning4j.text.documentiterator.LabelledDocument;

/* loaded from: input_file:org/deeplearning4j/spark/models/paragraphvectors/functions/DocumentSequenceConvertFunction.class */
public class DocumentSequenceConvertFunction extends BaseTokenizerFunction implements Function<LabelledDocument, Sequence<VocabWord>> {
    public DocumentSequenceConvertFunction(@NonNull Broadcast<VectorsConfiguration> broadcast) {
        super(broadcast);
        if (broadcast == null) {
            throw new NullPointerException("configurationBroadcast is marked non-null but is null");
        }
    }

    public Sequence<VocabWord> call(LabelledDocument labelledDocument) throws Exception {
        Sequence<VocabWord> sequence = new Sequence<>();
        if (labelledDocument.getReferencedContent() == null || labelledDocument.getReferencedContent().isEmpty()) {
            if (this.tokenizerFactory == null) {
                instantiateTokenizerFactory();
            }
            for (String str : this.tokenizerFactory.create(labelledDocument.getContent()).getTokens()) {
                if (str != null && !str.isEmpty()) {
                    sequence.addElement(new VocabWord(1.0d, str));
                }
            }
        } else {
            sequence.addElements(labelledDocument.getReferencedContent());
        }
        for (String str2 : labelledDocument.getLabels()) {
            if (str2 != null && !str2.isEmpty()) {
                VocabWord vocabWord = new VocabWord(1.0d, str2);
                vocabWord.markAsLabel(true);
                sequence.addSequenceLabel(vocabWord);
            }
        }
        return sequence;
    }
}
