/*
 * Decompiled with CFR 0.152.
 */
package gorsat.process;

import gorsat.Commands.CommandParseUtilities;
import gorsat.DynIterator;
import gorsat.process.GorDataType;
import gorsat.process.PNFilterFunction;
import gorsat.process.PipeInstance;
import gorsat.process.PipeOptions;
import gorsat.process.SparkRowSource;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.OptionalInt;
import java.util.Spliterators;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import java.util.zip.DataFormatException;
import java.util.zip.GZIPInputStream;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
import org.apache.spark.sql.catalyst.encoders.RowEncoder;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.gorpipe.gor.binsearch.CompressionType;
import org.gorpipe.gor.binsearch.Unzipper;
import org.gorpipe.gor.model.FileReader;
import org.gorpipe.gor.model.ParquetLine;
import org.gorpipe.spark.GorSparkSession;
import org.gorpipe.spark.RowDataType;
import org.gorpipe.spark.RowGorRDD;
import org.gorpipe.util.collection.ByteArray;
import scala.collection.JavaConverters;
import scala.collection.Seq;

public class SparkRowUtilities {
    static final String[] allowedGorSQLFileEndings = new String[]{".json", ".csv", ".tsv", ".gor", ".gorz", ".gor.gz", ".gord", ".txt", ".vcf", ".bgen"};
    static final String csvDataSource = "csv";
    static final String gordatasourceClassname = "gorsat.spark.GorDataSource";
    static byte[] unzipBuffer = new byte[131072];

    public static Predicate<String> getFileEndingPredicate() {
        return p -> Arrays.stream(allowedGorSQLFileEndings).map(e -> p.toLowerCase().endsWith((String)e)).reduce((a, b) -> a != false || b != false).get() != false || p.startsWith("<(");
    }

    public static String createMapString(Map<String, String> createMap, Map<String, String> defMap, String creates) {
        String mcreates = createMap.size() > 0 ? createMap.entrySet().stream().map(e -> "create " + (String)e.getKey() + " = " + (String)e.getValue()).collect(Collectors.joining("; ", "", ";")) : "";
        String mdefs = defMap.size() > 0 ? defMap.entrySet().stream().map(e -> "def " + (String)e.getKey() + " = " + (String)e.getValue()).collect(Collectors.joining("; ", "", ";")) : "";
        return mdefs + mcreates + creates;
    }

    public static List<String> createMapList(Map<String, String> createMap, Map<String, String> defMap, String creates) {
        List lcreates = createMap.entrySet().stream().map(e -> "create " + (String)e.getKey() + " = " + (String)e.getValue()).collect(Collectors.toList());
        List ldefs = defMap.entrySet().stream().map(e -> "def " + (String)e.getKey() + " = " + (String)e.getValue()).collect(Collectors.toList());
        List<String> lall = Arrays.asList(CommandParseUtilities.quoteSafeSplitAndTrim((String)creates, (char)';'));
        ArrayList<String> alist = new ArrayList<String>();
        alist.addAll(ldefs);
        alist.addAll(lcreates);
        alist.addAll(lall);
        return alist;
    }

    public static String generateTempViewName(String fileName, boolean usegorpipe, String filter, String chr, int pos, int end) {
        return SparkRowUtilities.generateTempViewName(fileName, usegorpipe, filter, chr, pos, end, Collections.emptyList());
    }

    public static String generateTempViewName(String fileName, boolean usegorpipe, String filter, String chr, int pos, int end, List<Instant> inst) {
        String prekey = usegorpipe + fileName;
        String key = filter == null ? prekey : filter + prekey;
        String ret = chr == null ? key : chr + pos + end + key;
        ret = ret + inst.stream().map(Instant::toString).collect(Collectors.joining());
        int hash = Math.abs(ret.hashCode());
        return "g" + hash;
    }

    public static StructType gor2Schema(String header, org.gorpipe.gor.model.Row types) {
        String[] hsplit = header.split("\t");
        StructField[] fields = new StructField[types.numCols()];
        for (int i = 0; i < fields.length; ++i) {
            String type = types.stringValue(i);
            DataType dt = type.equals("S") ? DataTypes.StringType : (type.equals("D") ? DataTypes.DoubleType : DataTypes.IntegerType);
            fields[i] = new StructField(hsplit[i], dt, true, Metadata.empty());
        }
        return new StructType(fields);
    }

    public static StructType gorDataTypeToStructType(GorDataType gorDataType) {
        String[] headerArray = gorDataType.header;
        Map<Integer, DataType> dataTypeMap = gorDataType.dataTypeMap;
        DataType[] dataTypes = new DataType[headerArray.length];
        int start = 0;
        if (!gorDataType.nor) {
            dataTypes[0] = DataTypes.StringType;
            dataTypes[1] = DataTypes.IntegerType;
            start = 2;
        }
        for (int i2 = start; i2 < dataTypes.length; ++i2) {
            dataTypes[i2] = dataTypeMap.getOrDefault(i2, DataTypes.StringType);
        }
        StructField[] fields = (StructField[])IntStream.range(0, headerArray.length).mapToObj(i -> new StructField(headerArray[i], dataTypes[i], true, Metadata.empty())).toArray(StructField[]::new);
        return new StructType(fields);
    }

    public static StructType inferSchema(InputStream fileStream, String fileName, boolean nor, boolean isGorz) throws IOException, DataFormatException {
        GorDataType gorDataType = SparkRowUtilities.inferDataTypes(fileStream, fileName, isGorz, nor);
        return SparkRowUtilities.gorDataTypeToStructType(gorDataType);
    }

    public static RowDataType translatePath(String fn, Path fileroot, String standalone) {
        RowDataType ret;
        if (fn.contains("://")) {
            ret = new RowDataType(fn, null);
        } else {
            List<Instant> inst;
            Path filePath = Paths.get(fn, new String[0]);
            if (!filePath.isAbsolute()) {
                if (standalone != null && standalone.length() > 0) {
                    int k = standalone.indexOf(32);
                    if (k == -1) {
                        k = standalone.length();
                    }
                    filePath = Paths.get(standalone.substring(0, k), new String[0]).resolve(fn);
                } else {
                    filePath = Paths.get(fn, new String[0]);
                    if (!filePath.isAbsolute() && !Files.exists(filePath, new LinkOption[0])) {
                        filePath = fileroot.resolve(filePath).normalize().toAbsolutePath();
                    }
                }
            }
            try {
                inst = Collections.singletonList(Files.getLastModifiedTime(filePath, new LinkOption[0]).toInstant());
            }
            catch (IOException e) {
                inst = Collections.emptyList();
            }
            ret = new RowDataType(filePath.toString(), inst);
        }
        return ret;
    }

    public static GorDataType gorCmdSchema(String gorcmd, GorSparkSession gorSparkSession, boolean nor) {
        DynIterator.DynamicRowSource drs = new DynIterator.DynamicRowSource(gorcmd, gorSparkSession.getGorContext(), false);
        String header = drs.getHeader();
        String[] ha = header.split("\t");
        Stream linestream = (Stream)StreamSupport.stream(Spliterators.spliteratorUnknownSize(drs, 16), false).map(Object::toString).onClose(() -> ((DynIterator.DynamicRowSource)drs).close());
        GorDataType gdt = SparkRowUtilities.typeFromStream(linestream, false, ha, nor);
        gdt.setUsedFiles(JavaConverters.seqAsJavaList((Seq)drs.usedFiles()));
        return gdt;
    }

    public static GorDataType gorCmdSchema(String[] gorcmds, GorSparkSession gorSparkSession) {
        Stream<DynIterator.DynamicRowSource> sdrs = Arrays.stream(gorcmds).map(d -> new DynIterator.DynamicRowSource(d, gorSparkSession.getGorContext(), false));
        Stream<Stream> lstr = sdrs.map(drs -> (Stream)StreamSupport.stream(Spliterators.spliteratorUnknownSize(drs, 16), false).map(Object::toString).onClose(() -> ((DynIterator.DynamicRowSource)drs).close()));
        String query = gorcmds[0];
        boolean nor = query.toLowerCase().startsWith("nor ") || query.toLowerCase().startsWith("norrows ");
        DynIterator.DynamicRowSource drs2 = new DynIterator.DynamicRowSource(query, gorSparkSession.getGorContext(), false);
        String header = drs2.getHeader();
        List usedFiles = JavaConverters.seqAsJavaList((Seq)drs2.usedFiles());
        drs2.close();
        String[] ha = header.split("\t");
        Stream linestream = lstr.reduce(Stream::concat).get();
        GorDataType gdt = SparkRowUtilities.typeFromStream(linestream, false, ha, nor);
        gdt.setUsedFiles(usedFiles);
        return gdt;
    }

    public static Dataset<? extends Row> registerFile(String[] fns, String name, String profile, GorSparkSession gorSparkSession, String standalone, Path fileroot, Path cacheDir, boolean usestreaming, String filter, String filterFile, String filterColumn, String splitFile, boolean nor, String chr, int pos, int end, String jobid, String cacheFile, boolean cpp, boolean tag, StructType schema) throws IOException, DataFormatException {
        Map<Object, Object> dataTypeMap;
        DataType[] dataTypes;
        Dataset gor;
        String tempViewName;
        List<Instant> inst;
        String fileName;
        String fn = fns[0];
        boolean curlyQuery = fn.startsWith("{");
        boolean nestedQuery = fn.startsWith("<(") || curlyQuery;
        Path filePath = null;
        if (nestedQuery) {
            fileName = fn.substring(curlyQuery ? 1 : 2, fn.length() - 1);
            Predicate<String> gorpred = SparkRowUtilities.getFileEndingPredicate();
            java.util.function.Function<String, Stream> gorfileflat = p -> p.startsWith("(") ? Arrays.stream(CommandParseUtilities.quoteCurlyBracketsSafeSplit((String)p.substring(1, p.length() - 1), (char)' ')).filter(gorpred) : Stream.of(p);
            String[] cmdsplit = CommandParseUtilities.quoteCurlyBracketsSafeSplit((String)fileName, (char)' ');
            inst = Arrays.stream(cmdsplit).flatMap(gorfileflat).filter(gorpred).map(x$0 -> Paths.get(x$0, new String[0])).map(p -> p.isAbsolute() ? p : fileroot.resolve((Path)p)).map(p -> {
                try {
                    return Files.getLastModifiedTime(p, new LinkOption[0]).toInstant();
                }
                catch (IOException e) {
                    return null;
                }
            }).filter(Objects::nonNull).collect(Collectors.toList());
            tempViewName = SparkRowUtilities.generateTempViewName(fileName, usestreaming, filter, chr, pos, end, inst);
        } else {
            RowDataType rdt = SparkRowUtilities.translatePath(fn, fileroot, standalone);
            fileName = rdt.path;
            inst = rdt.getTimestamp();
            tempViewName = SparkRowUtilities.generateTempViewName(fileName, usestreaming, filter, chr, pos, end, inst);
        }
        String[] tableNames = gorSparkSession.getSparkSession().sqlContext().tableNames();
        if (gorSparkSession.datasetMap().containsKey(tempViewName) && Arrays.asList(tableNames).contains(tempViewName)) {
            RowDataType rdt = gorSparkSession.datasetMap().get(tempViewName);
            gor = rdt.dataset;
            dataTypes = rdt.datatypes;
            dataTypeMap = new HashMap();
            IntStream.range(0, dataTypes.length).forEach(i -> {
                DataType dt = dataTypes[i];
                if (dt != DataTypes.StringType) {
                    dataTypeMap.put(i, dt);
                }
            });
            if (name != null) {
                gor.createOrReplaceTempView(name);
            }
        } else {
            nestedQuery = false;
            if (nestedQuery) {
                Map<Path, String> fNames;
                String[] headerArray;
                boolean hasFilter = filter != null && filter.length() > 0;
                Object gorcmd = fileName;
                if (hasFilter) {
                    gorcmd = ((String)gorcmd).substring(0, 4) + "-f" + filter + ((String)gorcmd).substring(3);
                }
                if (chr != null) {
                    String rest = ((String)gorcmd).substring(3);
                    gorcmd = ((String)gorcmd).substring(0, 4) + "-p" + chr + ":" + pos + "-";
                    if (end != -1) {
                        gorcmd = (String)gorcmd + end;
                    }
                    gorcmd = (String)gorcmd + rest;
                }
                GorDataType gdt = SparkRowUtilities.gorCmdSchema((String)gorcmd, gorSparkSession, nor);
                boolean isGord = false;
                List<String> usedFiles = gdt.usedFiles;
                if (usedFiles.size() > 0) {
                    fileName = usedFiles.get(0);
                    if (!fileName.contains("://")) {
                        Path path = filePath = standalone != null && standalone.length() > 0 ? Paths.get(standalone, new String[0]).resolve(fileName) : Paths.get(fileName, new String[0]);
                    }
                    if ((isGord = fileName.toLowerCase().endsWith(".gord")) && !hasFilter) {
                        headerArray = Arrays.copyOf(gdt.header, gdt.header.length + 1);
                        headerArray[headerArray.length - 1] = "PN";
                    } else {
                        headerArray = gdt.header;
                    }
                } else {
                    headerArray = gdt.header;
                }
                dataTypes = new DataType[headerArray.length];
                int start = 0;
                if (!nor) {
                    dataTypes[0] = DataTypes.StringType;
                    dataTypes[1] = DataTypes.IntegerType;
                    start = 2;
                }
                for (int i2 = start; i2 < dataTypes.length; ++i2) {
                    dataTypes[i2] = gdt.dataTypeMap.getOrDefault(i2, DataTypes.StringType);
                }
                StructField[] fields = (StructField[])IntStream.range(0, headerArray.length).mapToObj(i -> new StructField(headerArray[i], dataTypes[i], true, Metadata.empty())).toArray(StructField[]::new);
                schema = new StructType(fields);
                ExpressionEncoder encoder = RowEncoder.apply((StructType)schema);
                if (isGord) {
                    assert (filePath != null);
                    Path fileParent = filePath.getParent();
                    fNames = Files.lines(filePath).map(l -> l.split("\t")).peek(l -> {
                        l[0] = l[0].split("\\|")[0];
                    }).collect(Collectors.toMap(s -> fileParent.resolve(s[0]), s -> s[1]));
                    HashMap<String, String> uNames = new HashMap<String, String>();
                    for (Path p2 : fNames.keySet()) {
                        uNames.put(p2.toUri().toString(), fNames.get(p2));
                    }
                } else {
                    fNames = null;
                }
                JavaRDD rdd = new RowGorRDD(gorSparkSession.getSparkSession(), (String)gorcmd, "", !hasFilter && fNames != null ? String.join((CharSequence)",", fNames.values()) : null, chr, pos, end, true).toJavaRDD();
                Function & Serializable rfunc = (Function & Serializable)a -> {
                    Object[] o = new Object[a.numCols()];
                    o[0] = a.chr;
                    o[1] = a.pos;
                    for (int i = 2; i < o.length; ++i) {
                        o[i] = fields[i].dataType().sameType(DataTypes.IntegerType) ? Integer.valueOf(a.colAsInt(i)) : (fields[i].dataType().sameType(DataTypes.DoubleType) ? Double.valueOf(a.colAsDouble(i)) : a.colAsString(i).toString());
                    }
                    return RowFactory.create((Object[])o);
                };
                JavaRDD nrdd = rdd.map((Function)rfunc);
                gor = gorSparkSession.getSparkSession().createDataset(nrdd.rdd(), (Encoder)encoder);
            } else {
                DataFrameReader dfr;
                HashMap<String, String> uNames;
                Map<Path, String> fNames;
                boolean isGord = fileName.toLowerCase().endsWith(".gord");
                Path dictFile = null;
                int dictSplit = 0;
                if (isGord && filePath != null) {
                    Path fileParent = filePath.toAbsolutePath().normalize().getParent();
                    dictSplit = Files.lines(filePath).mapToInt(l -> l.split("\t").length).findFirst().getAsInt();
                    dictFile = filePath;
                    fNames = Files.lines(filePath).map(l -> l.split("\t")).peek(l -> {
                        l[0] = l[0].split("\\|")[0];
                    }).collect(Collectors.toMap(s -> fileParent.resolve(s[0]), s -> s[1], (a1, a2) -> a1));
                    fileName = fNames.keySet().iterator().next().toString();
                    filePath = standalone != null && standalone.length() > 0 ? Paths.get(standalone, new String[0]).resolve(fileName) : Paths.get(fileName, new String[0]);
                    uNames = new HashMap();
                    for (Path p3 : fNames.keySet()) {
                        uNames.put(p3.toUri().toString(), fNames.get(p3));
                    }
                } else if (fns.length > 1) {
                    fNames = Arrays.stream(fns).collect(Collectors.toMap(x$0 -> Paths.get(x$0, new String[0]), s -> s));
                    fileName = fNames.keySet().iterator().next().toString();
                    filePath = standalone != null && standalone.length() > 0 ? Paths.get(standalone, new String[0]).resolve(fileName) : Paths.get(fileName, new String[0]);
                    uNames = new HashMap<String, String>();
                    for (Path p4 : fNames.keySet()) {
                        uNames.put(p4.toUri().toString(), fNames.get(p4));
                    }
                } else {
                    fNames = null;
                    uNames = null;
                }
                if (fileName.startsWith("spark ")) {
                    PipeInstance pi = new PipeInstance(gorSparkSession.getGorContext());
                    PipeOptions po = new PipeOptions();
                    po.query_$eq(fileName);
                    pi.subProcessArguments(po);
                    SparkRowSource sparkRowSource = (SparkRowSource)pi.theInputSource();
                    gor = sparkRowSource.getDataset();
                    dataTypes = (DataType[])Arrays.stream(gor.schema().fields()).map(StructField::dataType).toArray(DataType[]::new);
                } else if (fileName.startsWith("pgor ") || fileName.startsWith("partgor ") || fileName.startsWith("parallel ") || fileName.startsWith("gor ") || fileName.startsWith("nor ") || fileName.startsWith("gorrows ") || fileName.startsWith("norrows ")) {
                    String securityContext;
                    dfr = gorSparkSession.getSparkSession().read().format(gordatasourceClassname);
                    dfr.option("query", fileName);
                    if (tag) {
                        dfr.option("tag", true);
                    }
                    if (fileroot != null) {
                        dfr.option("projectroot", fileroot.toString());
                    }
                    if (cacheDir != null) {
                        dfr.option("cachedir", cacheDir.toString());
                    }
                    if ((securityContext = gorSparkSession.getProjectContext().getFileReader().getSecurityContext()) != null) {
                        dfr.option("securityContext", securityContext);
                    }
                    dfr.option("aliasfile", gorSparkSession.getProjectContext().getGorAliasFile());
                    dfr.option("configfile", gorSparkSession.getProjectContext().getGorConfigFile());
                    if (schema != null) {
                        dfr.schema(schema);
                    }
                    gor = dfr.load();
                    dataTypes = (DataType[])Arrays.stream(gor.schema().fields()).map(StructField::dataType).toArray(DataType[]::new);
                } else if (fileName.toLowerCase().endsWith(".json")) {
                    dfr = gorSparkSession.getSparkSession().read().format("json");
                    if (schema != null) {
                        dfr = dfr.schema(schema);
                    }
                    gor = dfr.load(fileName);
                    dataTypes = (DataType[])Arrays.stream(gor.schema().fields()).map(StructField::dataType).toArray(DataType[]::new);
                } else if (fileName.toLowerCase().endsWith(".parquet")) {
                    gor = gorSparkSession.getSparkSession().read().format("org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2").load(fileName);
                    dataTypes = (DataType[])Arrays.stream(gor.schema().fields()).map(StructField::dataType).toArray(DataType[]::new);
                } else if (fileName.toLowerCase().endsWith(".vcf") || fileName.toLowerCase().endsWith(".vcf.gz") || fileName.toLowerCase().endsWith(".vcf.bgz")) {
                    String vcfDataSource = "io.projectglow.vcf.VCFFileFormat";
                    gor = gorSparkSession.getSparkSession().read().format(vcfDataSource).load(fileName);
                    dataTypes = (DataType[])Arrays.stream(gor.schema().fields()).map(StructField::dataType).toArray(DataType[]::new);
                } else if (fileName.toLowerCase().endsWith(".bgen")) {
                    String bgenDataSource = "io.projectglow.bgen.BgenFileFormat";
                    gor = gorSparkSession.getSparkSession().read().format(bgenDataSource).load(fileName);
                    dataTypes = (DataType[])Arrays.stream(gor.schema().fields()).map(StructField::dataType).toArray(DataType[]::new);
                } else if (splitFile == null && (fileName.toLowerCase().endsWith(".gor") || fileName.toLowerCase().endsWith(".nor") || fileName.toLowerCase().endsWith(".tsv") || fileName.toLowerCase().endsWith(".csv"))) {
                    String firstCol;
                    dfr = gorSparkSession.getSparkSession().read().format(csvDataSource).option("header", true);
                    dfr = schema == null ? dfr.option("inferSchema", true) : dfr.schema(schema);
                    if (!fileName.toLowerCase().endsWith(".csv")) {
                        dfr = dfr.option("delimiter", "\t");
                    }
                    if ((firstCol = (gor = dfr.load(fileName)).columns()[0]).startsWith("#")) {
                        gor = gor.withColumnRenamed(firstCol, firstCol.substring(1));
                    }
                    dataTypes = (DataType[])Arrays.stream(gor.schema().fields()).map(StructField::dataType).toArray(DataType[]::new);
                } else {
                    DataFrameReader dfr2;
                    Collection<Object> pns;
                    boolean isGorz = fileName.toLowerCase().endsWith(".gorz");
                    boolean isGorgz = fileName.toLowerCase().endsWith(".gor.gz") || fileName.toLowerCase().endsWith(".gor.bgz");
                    GorDataType gorDataType = SparkRowUtilities.inferDataTypes(gorSparkSession.getProjectContext().getFileReader(), fileName, isGorz, nor);
                    String[] headerArray = gorDataType.header;
                    dataTypeMap = gorDataType.dataTypeMap;
                    dataTypes = new DataType[headerArray.length];
                    int start = 0;
                    if (!nor && dataTypes.length > 1 && headerArray[0].equalsIgnoreCase("chrom")) {
                        dataTypes[0] = DataTypes.StringType;
                        dataTypes[1] = DataTypes.IntegerType;
                        start = 2;
                    }
                    for (int i3 = start; i3 < dataTypes.length; ++i3) {
                        dataTypes[i3] = dataTypeMap.getOrDefault(i3, DataTypes.StringType);
                    }
                    Collection<String> collection = filter != null && filter.length() > 0 ? new HashSet<String>(Arrays.asList(filter.split(","))) : (pns = fNames != null ? fNames.values() : Collections.emptySet());
                    if (isGorz && !gorDataType.base128 || dictFile != null) {
                        if (schema == null) {
                            StructField[] tmpfields;
                            StructField[] fields;
                            if (dictFile != null) {
                                Stream<StructField> baseStream = IntStream.range(0, headerArray.length).mapToObj(i -> new StructField(headerArray[i], dataTypes[i], true, Metadata.empty()));
                                Stream.Builder<StructField> sb = Stream.builder();
                                if (dictSplit == 2 && filterColumn != null && filterColumn.length() > 0) {
                                    sb.add(new StructField(filterColumn, DataTypes.StringType, true, Metadata.empty()));
                                }
                                if (splitFile != null && splitFile.length() > 0) {
                                    sb.add(new StructField("tag", DataTypes.StringType, true, Metadata.empty()));
                                }
                                Stream extra = sb.build();
                                fields = (StructField[])Stream.concat(baseStream, extra).toArray(StructField[]::new);
                            } else if (gorDataType.withStart) {
                                tmpfields = new StructField[]{new StructField("Chrom", DataTypes.StringType, true, Metadata.empty()), new StructField("Start", DataTypes.IntegerType, true, Metadata.empty()), new StructField("Stop", DataTypes.IntegerType, true, Metadata.empty()), new StructField("data", DataTypes.StringType, true, Metadata.empty())};
                                fields = tmpfields;
                            } else {
                                tmpfields = new StructField[]{new StructField("Chrom", DataTypes.StringType, true, Metadata.empty()), new StructField("Pos", DataTypes.IntegerType, true, Metadata.empty()), new StructField("data", DataTypes.StringType, true, Metadata.empty())};
                                fields = tmpfields;
                            }
                            schema = new StructType(fields);
                        }
                        if (uNames != null) {
                            if (dictFile != null) {
                                dfr2 = gorSparkSession.getSparkSession().read().format(gordatasourceClassname);
                                if (fileroot != null) {
                                    dfr2.option("projectroot", fileroot.toString());
                                }
                                dfr2.option("aliasfile", gorSparkSession.getProjectContext().getGorAliasFile());
                                dfr2.option("configfile", gorSparkSession.getProjectContext().getGorConfigFile());
                                String securityContext = gorSparkSession.getProjectContext().getFileReader().getSecurityContext();
                                if (securityContext != null) {
                                    dfr2 = dfr2.option("securityContext", securityContext);
                                }
                                if (filter != null) {
                                    dfr2 = dfr2.option("f", filter);
                                }
                                if (filterFile != null) {
                                    dfr2 = dfr2.option("ff", filterFile);
                                }
                                if (splitFile != null) {
                                    dfr2 = dfr2.option("split", splitFile);
                                }
                                if (filterColumn != null) {
                                    dfr2 = dfr2.option("s", filterColumn);
                                }
                                if (chr != null) {
                                    Object seek = chr;
                                    if (pos > 0 || end != -1) {
                                        seek = (String)seek + ":" + pos;
                                        if (end != -1) {
                                            seek = (String)seek + "-" + end;
                                        }
                                    }
                                    dfr2 = dfr2.option("p", (String)seek);
                                }
                                gor = dfr2.schema(schema).load(dictFile.toAbsolutePath().normalize().toString());
                                isGorz = false;
                            } else {
                                gor = gorSparkSession.getSparkSession().read().format(csvDataSource).option("header", "true").option("delimiter", "\t").schema(schema).load((String[])fNames.entrySet().stream().filter(e -> pns.contains(e.getValue())).map(Map.Entry::getKey).map(Path::toString).toArray(String[]::new));
                            }
                        } else {
                            gor = gorSparkSession.getSparkSession().read().format(csvDataSource).option("header", "true").option("delimiter", "\t").schema(schema).load(fileName);
                        }
                    } else {
                        if (schema == null) {
                            StructField[] fields = (StructField[])IntStream.range(0, headerArray.length).mapToObj(i -> new StructField(headerArray[i], dataTypes[i], true, Metadata.empty())).toArray(StructField[]::new);
                            schema = new StructType(fields);
                        }
                        if (uNames != null && !gorDataType.base128) {
                            gor = gorSparkSession.getSparkSession().read().format(csvDataSource).option("header", "true").option("delimiter", "\t").schema(schema).load((String[])fNames.entrySet().stream().filter(e -> pns.contains(e.getValue())).map(Map.Entry::getKey).map(Path::toString).toArray(String[]::new));
                            if (filter != null && filter.length() > 0) {
                                gor = gor.selectExpr(new String[]{"*", "get_pn(input_file_name()) as PN"});
                            }
                        } else if (isGorgz || gorDataType.base128 || splitFile != null) {
                            dfr2 = gorSparkSession.getSparkSession().read().format(gordatasourceClassname).schema(schema);
                            if (gorSparkSession.getRedisUri() != null && gorSparkSession.getRedisUri().length() > 0) {
                                dfr2 = dfr2.option("redis", gorSparkSession.getRedisUri()).option("jobid", jobid).option("cachefile", cacheFile).option("native", Boolean.toString(cpp));
                            }
                            if (splitFile != null) {
                                dfr2 = dfr2.option("split", splitFile);
                            }
                            if (chr != null) {
                                Object seek = chr;
                                if (pos > 0 || end != -1) {
                                    seek = (String)seek + ":" + pos;
                                    if (end != -1) {
                                        seek = (String)seek + "-" + end;
                                    }
                                }
                                dfr2 = dfr2.option("p", (String)seek);
                            }
                            gor = dfr2.load(fileName);
                        } else {
                            Dataset sgor = gorSparkSession.getSparkSession().read().format(csvDataSource).option("header", "true").option("delimiter", "\t").schema(schema).load(fileName);
                            if (filter != null && filter.length() > 0) {
                                OptionalInt oi;
                                int filterColumnIndex = headerArray.length - 1;
                                if (filterColumn != null && (oi = IntStream.range(0, headerArray.length).filter(i -> headerArray[i].equals(filterColumn)).findFirst()).isPresent()) {
                                    filterColumnIndex = oi.getAsInt();
                                }
                                PNFilterFunction ff = new PNFilterFunction(filter, filterColumnIndex);
                                sgor = sgor.filter((FilterFunction)ff);
                            }
                            gor = sgor;
                        }
                    }
                    if (!isGorgz && !gorDataType.base128) {
                        if (isGorz) {
                            if (chr != null) {
                                gor = gorDataType.withStart && end != -1 ? gor.filter((FilterFunction & Serializable)row -> chr.equals(row.getString(0)) && row.getInt(1) <= end && row.getInt(2) >= pos) : gor.filter((FilterFunction & Serializable)row -> chr.equals(row.getString(0)) && row.getInt(1) >= pos);
                            }
                            StructField[] flds = (StructField[])IntStream.range(0, headerArray.length).mapToObj(i -> new StructField(headerArray[i], dataTypes[i], true, Metadata.empty())).toArray(StructField[]::new);
                            schema = new StructType(flds);
                            ExpressionEncoder encoder = RowEncoder.apply((StructType)schema);
                            boolean withStart = gorDataType.withStart;
                            gor = gor.flatMap((FlatMapFunction & Serializable)row -> {
                                byte[] bb;
                                String zip = withStart ? row.getString(3) : row.getString(2);
                                char tp = zip.charAt(0);
                                CompressionType compressionLibrary = (tp & 2) == 0 ? CompressionType.ZLIB : CompressionType.ZSTD;
                                String zipo = zip.substring(1);
                                try {
                                    bb = Base64.getDecoder().decode(zipo);
                                }
                                catch (Exception e) {
                                    bb = ByteArray.to8Bit((byte[])zipo.getBytes());
                                }
                                Unzipper unzip = new Unzipper();
                                unzip.setType(compressionLibrary);
                                unzip.setRawInput(bb, 0, bb.length);
                                int unzipLen = unzip.decompress(unzipBuffer, 0, unzipBuffer.length);
                                ByteArrayInputStream bais = new ByteArrayInputStream(unzipBuffer, 0, unzipLen);
                                InputStreamReader isr = new InputStreamReader(bais);
                                BufferedReader br = new BufferedReader(isr);
                                return (nor ? br.lines().map(line -> {
                                    String[] split = line.split("\t");
                                    Object[] objs = new Object[split.length];
                                    for (int i = 0; i < split.length; ++i) {
                                        if (dataTypeMap.containsKey(i)) {
                                            if (dataTypeMap.get(i) == DataTypes.IntegerType) {
                                                objs[i] = Integer.parseInt(split[i]);
                                                continue;
                                            }
                                            objs[i] = Double.parseDouble(split[i]);
                                            continue;
                                        }
                                        objs[i] = split[i];
                                    }
                                    return RowFactory.create((Object[])objs);
                                }) : br.lines().map(line -> {
                                    String[] split = line.split("\t");
                                    Object[] objs = new Object[split.length];
                                    objs[0] = split[0];
                                    objs[1] = Integer.parseInt(split[1]);
                                    for (int i = 2; i < split.length; ++i) {
                                        if (dataTypeMap.containsKey(i)) {
                                            if (dataTypeMap.get(i) == DataTypes.IntegerType) {
                                                objs[i] = Integer.parseInt(split[i]);
                                                continue;
                                            }
                                            objs[i] = Double.parseDouble(split[i]);
                                            continue;
                                        }
                                        objs[i] = split[i];
                                    }
                                    return RowFactory.create((Object[])objs);
                                })).iterator();
                            }, (Encoder)encoder);
                            if (chr != null) {
                                gor = gor.filter((FilterFunction & Serializable)row -> {
                                    int p = row.getInt(1);
                                    return chr.equals(row.getString(0)) && p >= pos && (end == -1 || p <= end);
                                });
                            }
                        } else if (chr != null) {
                            gor = end != -1 ? gor.filter((FilterFunction & Serializable)row -> chr.equals(row.getString(0)) && row.getInt(1) <= end && row.getInt(2) >= pos) : gor.filter((FilterFunction & Serializable)row -> chr.equals(row.getString(0)) && row.getInt(1) >= pos);
                        }
                    }
                }
            }
            if (name != null && !name.startsWith("#")) {
                gor.createOrReplaceTempView(name);
            }
            gor.createOrReplaceTempView(tempViewName);
            gorSparkSession.datasetMap().put(tempViewName, new RowDataType((Dataset<? extends Row>)gor, dataTypes, fileName, inst));
        }
        return gor;
    }

    public static GorDataType inferDataTypes(FileReader fileReader, String fileName, boolean isGorz, boolean nor) throws IOException, DataFormatException {
        InputStream is = fileReader.getInputStream(fileName);
        return SparkRowUtilities.inferDataTypes(is, fileName, isGorz, nor);
    }

    public static GorDataType inferDataTypes(InputStream is, String fileName, boolean isGorz, boolean nor) throws IOException, DataFormatException {
        boolean isCompressed;
        String fileLow = fileName.toLowerCase();
        boolean bl = isCompressed = fileLow.endsWith(".gz") || fileLow.endsWith(".bgz");
        if (isCompressed) {
            is = new GZIPInputStream(is);
        }
        Stream<String> linestream = Stream.empty();
        boolean withStart = false;
        String[] headerArray = new String[]{};
        boolean base128 = false;
        if (is != null) {
            StringBuilder headerstr = new StringBuilder();
            int r = is.read();
            while (r != -1 && r != 10) {
                headerstr.append((char)r);
                r = is.read();
            }
            String header = headerstr.toString();
            if (header.startsWith("#")) {
                header = header.substring(1);
            }
            headerArray = header.split("\t");
            if (isGorz) {
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                r = is.read();
                if (r != -1) {
                    byte[] bb;
                    while (r != 9) {
                        r = is.read();
                    }
                    r = is.read();
                    while (r != 9) {
                        r = is.read();
                    }
                    r = is.read();
                    if (r >= 48 && r <= 57) {
                        withStart = true;
                        while (r != 9) {
                            r = is.read();
                        }
                        is.read();
                    }
                    CompressionType compressionLibrary = (r & 2) == 0 ? CompressionType.ZLIB : CompressionType.ZSTD;
                    r = is.read();
                    while (r != 10) {
                        baos.write(r);
                        r = is.read();
                    }
                    is.close();
                    byte[] baosArray = baos.toByteArray();
                    try {
                        bb = Base64.getDecoder().decode(baosArray);
                    }
                    catch (Throwable e) {
                        base128 = true;
                        bb = ByteArray.to8Bit((byte[])baosArray);
                    }
                    Unzipper unzip = new Unzipper();
                    unzip.setType(compressionLibrary);
                    unzip.setRawInput(bb, 0, bb.length);
                    int unzipLen = unzip.decompress(unzipBuffer, 0, unzipBuffer.length);
                    ByteArrayInputStream bais = new ByteArrayInputStream(unzipBuffer, 0, unzipLen);
                    InputStreamReader isr = new InputStreamReader(bais);
                    linestream = new BufferedReader(isr).lines();
                } else {
                    linestream = Stream.empty();
                }
            } else {
                linestream = new BufferedReader(new InputStreamReader(is)).lines();
            }
        }
        return SparkRowUtilities.typeFromStream(linestream, withStart, headerArray, nor, base128);
    }

    public static GorDataType typeFromParquetLine(ParquetLine pl, boolean withStart, String[] header) {
        HashMap<Integer, DataType> dataTypeMap = new HashMap<Integer, DataType>();
        String[] gortypes = new String[pl.numCols()];
        for (int i = 0; i < pl.numCols(); ++i) {
            PrimitiveType.PrimitiveTypeName ptm = pl.getType(i);
            if (ptm == PrimitiveType.PrimitiveTypeName.INT64) {
                dataTypeMap.put(i, DataTypes.LongType);
                gortypes[i] = "L";
                continue;
            }
            if (ptm == PrimitiveType.PrimitiveTypeName.INT32) {
                dataTypeMap.put(i, DataTypes.IntegerType);
                gortypes[i] = "I";
                continue;
            }
            if (ptm == PrimitiveType.PrimitiveTypeName.FLOAT) {
                dataTypeMap.put(i, DataTypes.FloatType);
                gortypes[i] = "D";
                continue;
            }
            dataTypeMap.put(i, DataTypes.StringType);
            gortypes[i] = "S";
        }
        return new GorDataType(dataTypeMap, withStart, header, gortypes, false);
    }

    public static GorDataType typeFromStream(Stream<String> linestream, boolean withStart, String[] headerArray, boolean nor) {
        return SparkRowUtilities.typeFromStream(linestream, withStart, headerArray, nor, false);
    }

    public static GorDataType typeFromStream(Stream<String> linestream, boolean withStart, String[] headerArray, boolean nor, boolean base128) {
        int start;
        HashMap<Integer, DataType> dataTypeMap = new HashMap<Integer, DataType>();
        if (nor) {
            headerArray = Arrays.copyOfRange(headerArray, 2, headerArray.length);
        }
        String[] gortypes = new String[headerArray.length];
        for (int i = start = 0; i < headerArray.length; ++i) {
            dataTypeMap.put(i, DataTypes.IntegerType);
            gortypes[i] = "I";
        }
        HashSet remSet = new HashSet();
        HashSet dSet = new HashSet();
        Stream<Object> strstr = linestream.limit(1000L).map(line -> line.split("\t", -1));
        if (nor) {
            strstr = strstr.map(a -> Arrays.copyOfRange(a, 2, ((String[])a).length));
        }
        List ok = strstr.collect(Collectors.toList());
        strstr = ok.stream();
        strstr.allMatch(line -> {
            int i;
            Iterator iterator;
            dataTypeMap.forEach((idx, colType) -> {
                String value = line[idx];
                if (colType == DataTypes.IntegerType) {
                    try {
                        Integer.parseInt(value);
                    }
                    catch (Exception e1) {
                        colType = DataTypes.DoubleType;
                    }
                }
                if (colType == DataTypes.DoubleType) {
                    try {
                        int di = value.indexOf(46);
                        if (di >= 0 || value.length() <= 16) {
                            Double.parseDouble(value);
                            dSet.add(idx);
                        } else {
                            remSet.add(idx);
                        }
                    }
                    catch (Exception e1) {
                        remSet.add(idx);
                    }
                }
            });
            if (remSet.size() > 0) {
                dataTypeMap.keySet().removeAll(remSet);
                iterator = remSet.iterator();
                while (iterator.hasNext()) {
                    i = (Integer)iterator.next();
                    gortypes[i] = "S";
                }
                remSet.clear();
            }
            iterator = dSet.iterator();
            while (iterator.hasNext()) {
                i = (Integer)iterator.next();
                dataTypeMap.put(i, DataTypes.DoubleType);
                gortypes[i] = "D";
            }
            return dataTypeMap.size() > 0;
        });
        return new GorDataType(dataTypeMap, withStart, headerArray, gortypes, base128, nor);
    }
}

