package com.ibm.avatar.algebra.oldscan;

import com.ibm.avatar.algebra.base.MemoizationTable;
import com.ibm.avatar.algebra.datamodel.AbstractTupleSchema;
import com.ibm.avatar.algebra.datamodel.FieldSetter;
import com.ibm.avatar.algebra.datamodel.Tuple;
import com.ibm.avatar.algebra.scan.DocScanInternal;
import com.ibm.avatar.api.Constants;
import com.ibm.avatar.logging.Log;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.regex.Pattern;

/* loaded from: input_file:com/ibm/avatar/algebra/oldscan/DirDocScan.class */
public class DirDocScan extends DocScanInternal {
    public static final String FILE_ENCODING = "UTF-8";
    private static final Pattern DIR_SKIP_NAME_REGEX = Pattern.compile("\\.svn");
    private final File dir;
    protected ArrayList<LookaheadItr<File>> filestack;
    protected FieldSetter<String> docLabelAcc;

    public DirDocScan(File file) throws IOException {
        this(file, DocScanInternal.createLabeledSchema());
    }

    public DirDocScan(File file, AbstractTupleSchema abstractTupleSchema) throws IOException {
        this.filestack = new ArrayList<>();
        if (false == file.isDirectory()) {
            throw new IOException(file + " is not a directory");
        }
        this.dir = file;
        this.docSchema = abstractTupleSchema;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.ibm.avatar.algebra.scan.DocScanInternal, com.ibm.avatar.algebra.base.Operator
    public AbstractTupleSchema createOutputSchema() {
        this.docTextAcc = this.docSchema.textSetter(getTextColName());
        this.docTextGetters.add(this.docSchema.textAcc(getTextColName()));
        if (this.docSchema.containsField(Constants.LABEL_COL_NAME)) {
            this.docLabelAcc = this.docSchema.textSetter(Constants.LABEL_COL_NAME);
            this.docTextGetters.add(this.docSchema.textAcc(Constants.LABEL_COL_NAME));
        }
        return this.docSchema;
    }

    @Override // com.ibm.avatar.algebra.scan.DocScanInternal
    protected Tuple getNextDoc(MemoizationTable memoizationTable) throws Exception {
        File peek = this.filestack.get(this.filestack.size() - 1).peek();
        InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(peek), "UTF-8");
        String docText = getDocText(inputStreamReader);
        inputStreamReader.close();
        Tuple createOutputTup = createOutputTup();
        this.docTextAcc.setVal(createOutputTup, docText);
        if (this.docSchema.containsField(Constants.LABEL_COL_NAME)) {
            this.docLabelAcc.setVal(createOutputTup, relativePath(this.dir, peek));
        }
        advanceToNextFile(memoizationTable, false);
        return createOutputTup;
    }

    private String relativePath(File file, File file2) {
        String substring = file2.getAbsolutePath().substring(file.getAbsolutePath().length());
        return '/' != File.separatorChar ? substring.replace(File.separatorChar, '/') : substring;
    }

    private void advanceToNextFile(MemoizationTable memoizationTable, boolean z) {
        LookaheadItr<File> lookaheadItr = this.filestack.get(this.filestack.size() - 1);
        while (true) {
            if (false == lookaheadItr.hasNext()) {
                this.filestack.remove(this.filestack.size() - 1);
                if (0 == this.filestack.size()) {
                    memoizationTable.setEndOfInput();
                    return;
                }
                lookaheadItr = this.filestack.get(this.filestack.size() - 1);
            } else {
                if (!z) {
                    lookaheadItr.next();
                }
                if (lookaheadItr.hasNext()) {
                    if (lookaheadItr.peek().isDirectory()) {
                        LookaheadItr<File> lookaheadItr2 = lookaheadItr;
                        lookaheadItr = pushDir(lookaheadItr);
                        z = !lookaheadItr.equals(lookaheadItr2);
                    }
                    if (lookaheadItr.hasNext() && lookaheadItr.peek().isFile()) {
                        return;
                    }
                } else {
                    continue;
                }
            }
        }
    }

    private LookaheadItr<File> pushDir(LookaheadItr<File> lookaheadItr) {
        if (0 != 0) {
            Log.debug("Scanning directory %s", lookaheadItr.peek());
        }
        ArrayList arrayList = new ArrayList();
        for (File file : lookaheadItr.peek().listFiles()) {
            arrayList.add(file);
        }
        if (arrayList.isEmpty()) {
            return lookaheadItr;
        }
        if (DIR_SKIP_NAME_REGEX.matcher(lookaheadItr.peek().getName()).matches()) {
            if (0 != 0) {
                Log.debug("DirDocScan skipping directory %s.", lookaheadItr.peek());
            }
            return lookaheadItr;
        }
        if (0 != 0) {
            Log.debug("Before sorting: %s", arrayList);
        }
        Collections.sort(arrayList);
        if (0 != 0) {
            Log.debug("After sorting: %s", arrayList);
        }
        LookaheadItrImpl lookaheadItrImpl = new LookaheadItrImpl(arrayList.iterator());
        this.filestack.add(lookaheadItrImpl);
        if (0 != 0) {
            Log.debug("After stack insertion, iterator points to: %s", lookaheadItrImpl.peek());
        }
        return lookaheadItrImpl;
    }

    @Override // com.ibm.avatar.algebra.scan.DocScanInternal
    protected void startScan(MemoizationTable memoizationTable) throws Exception {
        ArrayList arrayList = new ArrayList();
        arrayList.add(this.dir);
        LookaheadItrImpl lookaheadItrImpl = new LookaheadItrImpl(arrayList.iterator());
        this.filestack.add(lookaheadItrImpl);
        if (false == pushDir(lookaheadItrImpl).peek().isFile()) {
            advanceToNextFile(memoizationTable, true);
        }
    }

    @Override // com.ibm.avatar.algebra.scan.DocScanInternal
    protected void reallyCheckEndOfInput(MemoizationTable memoizationTable) throws Exception {
        if (0 == this.filestack.size()) {
            memoizationTable.setEndOfInput();
        }
    }
}
