/*
 * Decompiled with CFR 0.152.
 */
package chalk.tools.formats;

import chalk.tools.doccat.DocumentSample;
import chalk.tools.tokenize.SimpleTokenizer;
import chalk.tools.util.FilterObjectStream;
import chalk.tools.util.PlainTextByLineStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;

public class LeipzigDoccatSampleStream
extends FilterObjectStream<String, DocumentSample> {
    private final String language;
    private final int sentencesPerDocument;

    LeipzigDoccatSampleStream(String string, int n, InputStream inputStream) throws IOException {
        super(new PlainTextByLineStream(inputStream, "UTF-8"));
        System.setOut(new PrintStream((OutputStream)System.out, true, "UTF-8"));
        this.language = string;
        this.sentencesPerDocument = n;
    }

    @Override
    public DocumentSample read() throws IOException {
        String string;
        StringBuilder stringBuilder = new StringBuilder();
        for (int i = 0; i < this.sentencesPerDocument && (string = (String)this.samples.read()) != null; ++i) {
            String[] stringArray = SimpleTokenizer.INSTANCE.tokenize(string);
            if (stringArray.length == 0) {
                throw new IOException("Empty lines are not allowed!");
            }
            for (int j = 1; j < stringArray.length; ++j) {
                stringBuilder.append(stringArray[j]);
                stringBuilder.append(' ');
            }
        }
        if (stringBuilder.length() > 0) {
            return new DocumentSample(this.language, stringBuilder.toString());
        }
        return null;
    }
}

