package com.ibm.avatar.algebra.oldscan;

import com.ibm.avatar.algebra.base.MemoizationTable;
import com.ibm.avatar.algebra.datamodel.AbstractTupleSchema;
import com.ibm.avatar.algebra.datamodel.FieldSetter;
import com.ibm.avatar.algebra.datamodel.ObjectID;
import com.ibm.avatar.algebra.datamodel.Tuple;
import com.ibm.avatar.algebra.scan.DocScanInternal;
import com.ibm.avatar.api.Constants;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import org.apache.tools.tar.TarEntry;
import org.apache.tools.tar.TarInputStream;

/* loaded from: input_file:com/ibm/avatar/algebra/oldscan/TarFileScan.class */
public class TarFileScan extends DocScanInternal {
    private static final String ENCODING_NAME = "UTF-8";
    private final File tarfile;
    TarInputStream in;
    InputStreamReader isr;
    private TarEntry curEntry;
    private int nextDummyDocID;
    protected FieldSetter<String> docLabelAcc;

    public TarFileScan(File file) throws Exception {
        this(file, DocScanInternal.createLabeledSchema());
    }

    public TarFileScan(File file, AbstractTupleSchema abstractTupleSchema) throws FileNotFoundException {
        this.nextDummyDocID = 1;
        if (!file.exists()) {
            throw new FileNotFoundException("Archive file " + file.getPath() + " not found");
        }
        this.tarfile = file;
        this.docSchema = abstractTupleSchema;
    }

    @Override // com.ibm.avatar.algebra.scan.DocScanInternal
    protected Tuple getNextDoc(MemoizationTable memoizationTable) throws Exception {
        if (null == this.curEntry) {
            throw new RuntimeException("Read past end of archive");
        }
        String name = this.curEntry.getName();
        String docText = getDocText(this.isr);
        Tuple createOutputTup = createOutputTup();
        this.docTextAcc.setVal(createOutputTup, docText);
        if (this.docSchema.containsField(Constants.LABEL_COL_NAME)) {
            this.docLabelAcc.setVal(createOutputTup, name);
        }
        int i = this.nextDummyDocID;
        this.nextDummyDocID = i + 1;
        createOutputTup.setOid(new ObjectID("Document", i, true));
        advanceToNextEntry(memoizationTable);
        return createOutputTup;
    }

    private void advanceToNextEntry(MemoizationTable memoizationTable) throws IOException {
        do {
            this.curEntry = this.in.getNextEntry();
            if (null == this.curEntry) {
                memoizationTable.setEndOfInput();
            }
            if (null == this.curEntry) {
                return;
            }
        } while (this.curEntry.isDirectory());
    }

    @Override // com.ibm.avatar.algebra.scan.DocScanInternal
    protected void startScan(MemoizationTable memoizationTable) throws Exception {
        if (this.tarfile.getName().matches(".*gz")) {
            this.in = new TarInputStream(new GZIPInputStream(new FileInputStream(this.tarfile)));
        } else {
            this.in = new TarInputStream(new FileInputStream(this.tarfile));
        }
        this.isr = new InputStreamReader((InputStream) this.in, "UTF-8");
        advanceToNextEntry(memoizationTable);
        if (null == this.curEntry) {
            System.err.printf("Warning: Tarfile %s is empty\n", this.tarfile);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.ibm.avatar.algebra.scan.DocScanInternal, com.ibm.avatar.algebra.base.Operator
    public AbstractTupleSchema createOutputSchema() {
        this.docTextAcc = this.docSchema.textSetter(getTextColName());
        this.docTextGetters.add(this.docSchema.textAcc(getTextColName()));
        if (this.docSchema.containsField(Constants.LABEL_COL_NAME)) {
            this.docLabelAcc = this.docSchema.textSetter(Constants.LABEL_COL_NAME);
            this.docTextGetters.add(this.docSchema.textAcc(Constants.LABEL_COL_NAME));
        }
        return this.docSchema;
    }

    @Override // com.ibm.avatar.algebra.scan.DocScanInternal
    protected void reallyCheckEndOfInput(MemoizationTable memoizationTable) throws Exception {
    }
}
