package com.ibm.avatar.algebra.util.document;

import com.ibm.avatar.algebra.datamodel.FieldGetter;
import com.ibm.avatar.algebra.datamodel.Span;
import com.ibm.avatar.algebra.datamodel.Tuple;
import com.ibm.avatar.api.Constants;
import com.ibm.avatar.api.DocReader;
import java.io.File;
import java.io.FileOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

/* loaded from: input_file:com/ibm/avatar/algebra/util/document/PrefixDocs.class */
public class PrefixDocs {
    public static final String USAGE = "java %s\n    [input] [output.zip] [ndoc]\n";

    public static void main(String[] strArr) throws Exception {
        if (3 != strArr.length) {
            System.err.printf("java %s\n    [input] [output.zip] [ndoc]\n", PrefixDocs.class.getName());
            return;
        }
        File file = new File(strArr[0]);
        File file2 = new File(strArr[1]);
        int intValue = Integer.valueOf(strArr[2]).intValue();
        DocReader docReader = new DocReader(file);
        FieldGetter<Span> spanAcc = docReader.getDocSchema().spanAcc("text");
        FieldGetter<Span> spanAcc2 = docReader.getDocSchema().spanAcc(Constants.LABEL_COL_NAME);
        long currentTimeMillis = System.currentTimeMillis();
        long j = 0;
        ZipOutputStream zipOutputStream = new ZipOutputStream(new FileOutputStream(file2));
        int i = 0;
        while (docReader.hasNext() && i < intValue) {
            Tuple next = docReader.next();
            String text = spanAcc.getVal(next).getText();
            zipOutputStream.putNextEntry(new ZipEntry(spanAcc2.getVal(next).getText()));
            zipOutputStream.write(text.getBytes("UTF-8"));
            i++;
            j += spanAcc.getVal(next).getText().length();
            if (0 == i % 10000) {
                long currentTimeMillis2 = System.currentTimeMillis() - currentTimeMillis;
                long j2 = (j / 1024) / 1024;
                long j3 = currentTimeMillis2 / 1000;
                System.err.printf("Read %d documents (%d MB) in %d sec (%1.2f MB/sec)...\n", Integer.valueOf(i), Long.valueOf(j2), Long.valueOf(j3), Double.valueOf(j2 / j3));
            }
        }
        zipOutputStream.close();
        System.err.printf("Wrote %d documents to %s\n", Integer.valueOf(i), file2);
    }
}
