/*
 * Decompiled with CFR 0.152.
 */
package org.maochen.nlp.ml.util.dataio;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.maochen.nlp.ml.Tuple;
import org.maochen.nlp.ml.vector.FeatNamedVector;
import org.maochen.nlp.ml.vector.IVector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CSVDataReader {
    private static final Logger LOG = LoggerFactory.getLogger(CSVDataReader.class);
    private String filename;
    private String delim;
    int labelCol;
    String[] header = null;
    Set<Integer> ignoredColumns = new HashSet<Integer>();
    int posNegIndex = -1;

    public List<Tuple> read() throws IOException {
        FileInputStream fileInputStream = new FileInputStream(this.filename);
        return this.read(fileInputStream);
    }

    protected Tuple extractValuedFeat(CSVRecord record) {
        FeatNamedVector featNamedVector = new FeatNamedVector(new double[record.size() - 1 - this.ignoredColumns.size()]);
        featNamedVector.featsName = new String[record.size()];
        Tuple tuple = new Tuple((IVector)featNamedVector);
        tuple.label = record.get(this.labelCol);
        for (int i = 0; i < record.size(); ++i) {
            if (i == this.labelCol || this.ignoredColumns.contains(i)) continue;
            featNamedVector.featsName[i] = this.header[i];
            try {
                double val;
                tuple.vector.getVector()[i] = val = Double.parseDouble(record.get(i));
                continue;
            }
            catch (NumberFormatException e) {
                double val;
                if (this.header != null) {
                    int n = i;
                    featNamedVector.featsName[n] = featNamedVector.featsName[n] + "_" + record.get(i).toLowerCase().trim();
                }
                tuple.vector.getVector()[i] = val = record.get(i).trim().isEmpty() ? 0.0 : 1.0;
            }
        }
        return tuple;
    }

    public List<Tuple> read(InputStream is) throws IOException {
        CSVFormat format = CSVFormat.RFC4180.withHeader(new String[0]).withDelimiter(this.delim.charAt(0));
        CSVParser csvParser = new CSVParser((Reader)new InputStreamReader(is), format);
        List records = csvParser.getRecords();
        this.header = (String[])csvParser.getHeaderMap().entrySet().stream().sorted((e1, e2) -> ((Integer)e1.getValue()).compareTo((Integer)e2.getValue())).map(Map.Entry::getKey).toArray(String[]::new);
        this.labelCol = this.labelCol == -1 ? ((CSVRecord)records.get(0)).size() - 1 : this.labelCol;
        List<Tuple> ds = ((Stream)records.stream().parallel()).map(this::extractValuedFeat).collect(Collectors.toList());
        return ds;
    }

    public CSVDataReader(String filename, int labelCol, String delim, Set<Integer> ignoredColumns, int posNegIndex) {
        this.filename = filename;
        this.labelCol = labelCol;
        this.delim = delim;
        this.posNegIndex = posNegIndex;
        if (ignoredColumns != null) {
            this.ignoredColumns = ignoredColumns;
        }
    }
}

