package com.ibm.avatar.api;

import com.ibm.avatar.algebra.base.MemoizationTable;
import com.ibm.avatar.algebra.datamodel.FieldGetter;
import com.ibm.avatar.algebra.datamodel.Pair;
import com.ibm.avatar.algebra.datamodel.Text;
import com.ibm.avatar.algebra.datamodel.TextGetter;
import com.ibm.avatar.algebra.datamodel.Tuple;
import com.ibm.avatar.algebra.datamodel.TupleList;
import com.ibm.avatar.algebra.datamodel.TupleSchema;
import com.ibm.avatar.algebra.oldscan.CsvFileScan;
import com.ibm.avatar.algebra.oldscan.DBDumpFileScan;
import com.ibm.avatar.algebra.oldscan.DirDocScan;
import com.ibm.avatar.algebra.oldscan.JsonFileScan;
import com.ibm.avatar.algebra.oldscan.TarFileScan;
import com.ibm.avatar.algebra.oldscan.TextFileScan;
import com.ibm.avatar.algebra.oldscan.ZipFileScan;
import com.ibm.avatar.algebra.scan.DocScanInternal;
import com.ibm.avatar.algebra.util.lang.LangCode;
import com.ibm.avatar.api.exceptions.ExceptionWithView;
import com.ibm.avatar.api.exceptions.TextAnalyticsException;
import com.ibm.avatar.logging.Log;
import java.io.File;
import java.util.Iterator;
import java.util.Map;

/* loaded from: input_file:com/ibm/avatar/api/DocReader.class */
public class DocReader {
    protected DocScanInternal scan;
    private TupleSchema docSchema;
    private MemoizationTable mt;

    @Deprecated
    private FieldGetter<Text> textAcc;

    @Deprecated
    private FieldGetter<Text> labelAcc;
    private Map<String, TupleSchema> extViewSchemas;

    @Deprecated
    private Map<String, FieldGetter<?>> docGetters;

    @Deprecated
    private Map<String, Map<String, FieldGetter<?>>> allExtViewGetters;
    private Map<Pair<String, String>, TupleSchema> extNameVsSchema;
    protected File docFile;
    private DataFormat inputFormat;
    private char csvFieldSeparator;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/ibm/avatar/api/DocReader$DataFormat.class */
    public enum DataFormat {
        TEXT("Text file"),
        DIRECTORY("Directory of text files"),
        ARCHIVE("Compressed archive of text files"),
        DB2_DUMP("DB2/Derby export format"),
        CSV("Comma-separated-values format"),
        JSON("JSON format");

        private final String name;

        DataFormat(String str) {
            this.name = str;
        }

        @Override // java.lang.Enum
        public String toString() {
            return this.name;
        }
    }

    public static Iterator<String> makeDocTextItr(File file) throws TextAnalyticsException {
        DocReader docReader = new DocReader(file);
        final TextGetter textAcc = docReader.getDocSchema().textAcc("text");
        return new Iterator<String>() { // from class: com.ibm.avatar.api.DocReader.1
            @Override // java.util.Iterator
            public boolean hasNext() {
                return DocReader.this.hasNext();
            }

            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.Iterator
            public String next() {
                return textAcc.getVal(DocReader.this.next()).getText();
            }

            @Override // java.util.Iterator
            public void remove() {
                DocReader.this.remove();
            }
        };
    }

    public static Iterator<Pair<String, String>> makePairsItr(File file) throws TextAnalyticsException {
        DocReader docReader = new DocReader(file);
        final TextGetter textAcc = docReader.getDocSchema().textAcc("text");
        final TextGetter textAcc2 = docReader.getDocSchema().textAcc(Constants.LABEL_COL_NAME);
        return new Iterator<Pair<String, String>>() { // from class: com.ibm.avatar.api.DocReader.2
            int docCount = 0;

            @Override // java.util.Iterator
            public boolean hasNext() {
                return DocReader.this.hasNext();
            }

            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.Iterator
            public Pair<String, String> next() {
                Tuple next = DocReader.this.next();
                this.docCount++;
                return new Pair<>(null == textAcc2 ? String.format("Document %d", Integer.valueOf(this.docCount)) : ((Text) textAcc2.getVal(next)).getText(), ((Text) textAcc.getVal(next)).getText());
            }

            @Override // java.util.Iterator
            public void remove() {
                DocReader.this.remove();
            }
        };
    }

    public static Iterator<Pair<Tuple, Map<String, TupleList>>> makeDocandExternalPairsItr(String str, TupleSchema tupleSchema, Map<Pair<String, String>, TupleSchema> map) throws TextAnalyticsException {
        return new Iterator<Pair<Tuple, Map<String, TupleList>>>() { // from class: com.ibm.avatar.api.DocReader.3
            @Override // java.util.Iterator
            public boolean hasNext() {
                return DocReader.this.hasNext();
            }

            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.Iterator
            public Pair<Tuple, Map<String, TupleList>> next() {
                return new Pair<>(DocReader.this.next(), DocReader.this.getExtViewTups());
            }

            @Override // java.util.Iterator
            public void remove() {
                DocReader.this.remove();
            }
        };
    }

    public DocReader(File file) throws TextAnalyticsException {
        this.inputFormat = DataFormat.TEXT;
        this.csvFieldSeparator = ',';
        try {
            this.docFile = file;
            this.scan = DocScanInternal.makeFileScan(file);
            setInputFormat();
            this.mt = new MemoizationTable(this.scan);
            if (this.scan.getOutputSchema().containsField("text")) {
                this.textAcc = this.scan.getOutputSchema().textAcc("text");
            }
            if (null == this.textAcc) {
                throw new RuntimeException("Scan returned NULL for text accessor!");
            }
            this.docSchema = this.scan.getDocSchema();
            if (this.scan.getOutputSchema().containsField(Constants.LABEL_COL_NAME)) {
                this.labelAcc = this.scan.getOutputSchema().textAcc(Constants.LABEL_COL_NAME);
            } else {
                this.labelAcc = null;
            }
        } catch (Throwable th) {
            throw TextAnalyticsException.convertToTextAnalyticsException(th, TextAnalyticsException.ExceptionType.RUNTIME_ERROR);
        }
    }

    public DocReader(File file, TupleSchema tupleSchema, Map<Pair<String, String>, TupleSchema> map) throws TextAnalyticsException {
        this(file, tupleSchema, map, ',');
    }

    public DocReader(File file, TupleSchema tupleSchema, Map<Pair<String, String>, TupleSchema> map, char c) throws TextAnalyticsException {
        this.inputFormat = DataFormat.TEXT;
        this.csvFieldSeparator = ',';
        try {
            this.docFile = file;
            this.extNameVsSchema = map;
            this.csvFieldSeparator = c;
            this.scan = DocScanInternal.makeFileScan(file, tupleSchema, map, c);
            setInputFormat();
            this.mt = new MemoizationTable(this.scan);
            this.docSchema = this.scan.getDocSchema();
            if (this.scan instanceof JsonFileScan) {
                this.inputFormat = DataFormat.JSON;
                this.extViewSchemas = ((JsonFileScan) this.scan).getExtViewSchemas();
                this.docGetters = ((JsonFileScan) this.scan).getDocAcc();
                this.allExtViewGetters = ((JsonFileScan) this.scan).getAllExtViewAcc();
            } else {
                if (map != null) {
                    Log.info("Warning: external view schema passed into DocReader, which does not support external views for this input data collection type.  This parameter will be ignored.", new Object[0]);
                }
                if (this.scan instanceof CsvFileScan) {
                    this.docGetters = ((CsvFileScan) this.scan).getDocAcc();
                }
            }
            if (this.scan.getOutputSchema().containsField("text")) {
                this.textAcc = this.scan.getOutputSchema().textAcc("text");
            } else {
                this.textAcc = null;
            }
            if (this.scan.getOutputSchema().containsField(Constants.LABEL_COL_NAME)) {
                this.labelAcc = this.scan.getOutputSchema().textAcc(Constants.LABEL_COL_NAME);
            } else {
                this.labelAcc = null;
            }
        } catch (Throwable th) {
            throw TextAnalyticsException.convertToTextAnalyticsException(th, TextAnalyticsException.ExceptionType.RUNTIME_ERROR);
        }
    }

    public boolean hasNext() {
        return this.mt.haveMoreInput();
    }

    public Tuple next() {
        try {
            return this.scan.getNextDocTup(this.mt);
        } catch (ExceptionWithView e) {
            throw e;
        } catch (Throwable th) {
            throw new ExceptionWithView(th, "Document");
        }
    }

    public void remove() {
        this.mt.closeOutputs();
        this.scan = null;
        this.mt = null;
    }

    @Deprecated
    public FieldGetter<Text> getTextAcc() {
        return this.textAcc;
    }

    @Deprecated
    public FieldGetter<Text> getLabelAcc() {
        return this.labelAcc;
    }

    @Deprecated
    public FieldGetter<?> getDocFieldAcc(String str) {
        if (this.docGetters != null) {
            return this.docGetters.get(str);
        }
        throw new RuntimeException("Document field accessors not initialized.");
    }

    @Deprecated
    public FieldGetter<?> getExtViewFieldAcc(String str, String str2) {
        if (this.allExtViewGetters != null) {
            return this.allExtViewGetters.get(str).get(str2);
        }
        throw new RuntimeException("External view accessors not initialized.");
    }

    public TupleSchema getDocSchema() {
        return this.docSchema;
    }

    public TupleSchema getExternalViewSchema(String str) {
        if (this.extViewSchemas == null) {
            return null;
        }
        return this.extViewSchemas.get(str);
    }

    public String[] getDocSchemaFields() {
        if (this.scan != null) {
            return this.scan.getOutputSchema().getFieldNames();
        }
        throw new RuntimeException("Document scan not initialized.");
    }

    public String[] getExternalViewSchemaFields(String str) {
        if (this.extViewSchemas == null) {
            return null;
        }
        return this.extViewSchemas.get(str).getFieldNames();
    }

    public Map<String, TupleList> getExtViewTups() {
        try {
            if (this.scan instanceof JsonFileScan) {
                return ((JsonFileScan) this.scan).getExtViewTups();
            }
            if (this.extNameVsSchema != null) {
                throw new Exception("External view tuples can only be retrieved from input document collections in the JSON format.");
            }
            return null;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public int size() throws TextAnalyticsException {
        DocReader docReader = null;
        try {
            docReader = this.docFile.toString().endsWith(Constants.JSON_EXTENSION) ? new DocReader(this.docFile, this.scan.getDocSchema(), this.extNameVsSchema) : this.docFile.toString().endsWith(Constants.CSV_EXTENSION) ? new DocReader(this.docFile, this.scan.getDocSchema(), null, this.csvFieldSeparator) : new DocReader(this.docFile);
            int i = 0;
            while (docReader.hasNext()) {
                i++;
                docReader.next();
            }
            return i;
        } finally {
            if (docReader != null) {
                docReader.remove();
            }
        }
    }

    public void overrideLanguage(LangCode langCode) {
        if (this.scan == null) {
            throw new RuntimeException("Attempted to set language override, but document scan not initialized.");
        }
        this.scan.overrideLang(langCode);
    }

    public String getInputFormat() {
        return this.inputFormat.toString();
    }

    private void setInputFormat() {
        if (this.scan == null) {
            throw new RuntimeException("Attempted to determine input document collection format, but document scan not initialized.");
        }
        if ((this.scan instanceof ZipFileScan) || (this.scan instanceof TarFileScan)) {
            this.inputFormat = DataFormat.ARCHIVE;
            return;
        }
        if (this.scan instanceof DirDocScan) {
            this.inputFormat = DataFormat.DIRECTORY;
            return;
        }
        if (this.scan instanceof DBDumpFileScan) {
            this.inputFormat = DataFormat.DB2_DUMP;
            return;
        }
        if (this.scan instanceof JsonFileScan) {
            this.inputFormat = DataFormat.JSON;
        } else if (this.scan instanceof CsvFileScan) {
            this.inputFormat = DataFormat.CSV;
        } else {
            if (!(this.scan instanceof TextFileScan)) {
                throw new RuntimeException("Unknown document scan type, input document collection format could not be determined.");
            }
            this.inputFormat = DataFormat.TEXT;
        }
    }
}
