Package gorsat.process
Class SparkRowUtilities
java.lang.Object
gorsat.process.SparkRowUtilities
-
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionstatic Stringstatic StringgenerateTempViewName(String fileName, boolean usegorpipe, String filter, String chr, int pos, int end) static StringgenerateTempViewName(String fileName, boolean usegorpipe, String filter, String chr, int pos, int end, List<Instant> inst) static org.apache.spark.sql.types.StructTypegor2Schema(String header, org.gorpipe.gor.model.Row types) static GorDataTypegorCmdSchema(String[] gorcmds, org.gorpipe.spark.GorSparkSession gorSparkSession) static GorDataTypegorCmdSchema(String gorcmd, org.gorpipe.spark.GorSparkSession gorSparkSession, boolean nor) static org.apache.spark.sql.types.StructTypegorDataTypeToStructType(GorDataType gorDataType) static GorDataTypeinferDataTypes(InputStream is, String fileName, boolean isGorz, boolean nor) static GorDataTypeinferDataTypes(org.gorpipe.gor.model.FileReader fileReader, String fileName, boolean isGorz, boolean nor) static org.apache.spark.sql.types.StructTypeinferSchema(InputStream fileStream, String fileName, boolean nor, boolean isGorz) static org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row>registerFile(String[] fns, String name, String profile, org.gorpipe.spark.GorSparkSession gorSparkSession, String standalone, String cacheDir, boolean usestreaming, String filter, String filterFile, String filterColumn, String splitFile, boolean nor, String chr, int pos, int end, String jobid, String cacheFile, boolean cpp, boolean tag, org.apache.spark.sql.types.StructType schema, Map<String, String> readOptions) static RowDataTypetranslatePath(String fn, String standalone, org.gorpipe.gor.model.DriverBackedFileReader fileReader) static GorDataTypetypeFromParquetLine(org.gorpipe.gor.model.ParquetLine pl, boolean withStart, String[] header) static GorDataTypetypeFromStream(Stream<String> linestream, boolean withStart, String[] headerArray, boolean nor) static GorDataTypetypeFromStream(Stream<String> linestream, boolean withStart, String[] headerArray, boolean nor, boolean base128)
-
Constructor Details
-
SparkRowUtilities
public SparkRowUtilities()
-
-
Method Details
-
getFileEndingPredicate
-
getFileEndingTimestampPredicate
-
createMapString
-
createMapList
-
generateTempViewName
-
generateTempViewName
-
gor2Schema
public static org.apache.spark.sql.types.StructType gor2Schema(String header, org.gorpipe.gor.model.Row types) -
gorDataTypeToStructType
public static org.apache.spark.sql.types.StructType gorDataTypeToStructType(GorDataType gorDataType) -
inferSchema
public static org.apache.spark.sql.types.StructType inferSchema(InputStream fileStream, String fileName, boolean nor, boolean isGorz) throws IOException, DataFormatException - Throws:
IOExceptionDataFormatException
-
translatePath
public static RowDataType translatePath(String fn, String standalone, org.gorpipe.gor.model.DriverBackedFileReader fileReader) throws IOException - Throws:
IOException
-
gorCmdSchema
public static GorDataType gorCmdSchema(String gorcmd, org.gorpipe.spark.GorSparkSession gorSparkSession, boolean nor) -
gorCmdSchema
public static GorDataType gorCmdSchema(String[] gorcmds, org.gorpipe.spark.GorSparkSession gorSparkSession) -
registerFile
public static org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> registerFile(String[] fns, String name, String profile, org.gorpipe.spark.GorSparkSession gorSparkSession, String standalone, String cacheDir, boolean usestreaming, String filter, String filterFile, String filterColumn, String splitFile, boolean nor, String chr, int pos, int end, String jobid, String cacheFile, boolean cpp, boolean tag, org.apache.spark.sql.types.StructType schema, Map<String, String> readOptions) throws IOException, DataFormatException- Throws:
IOExceptionDataFormatException
-
inferDataTypes
public static GorDataType inferDataTypes(org.gorpipe.gor.model.FileReader fileReader, String fileName, boolean isGorz, boolean nor) throws IOException, DataFormatException - Throws:
IOExceptionDataFormatException
-
inferDataTypes
public static GorDataType inferDataTypes(InputStream is, String fileName, boolean isGorz, boolean nor) throws IOException, DataFormatException - Throws:
IOExceptionDataFormatException
-
typeFromParquetLine
public static GorDataType typeFromParquetLine(org.gorpipe.gor.model.ParquetLine pl, boolean withStart, String[] header) -
typeFromStream
public static GorDataType typeFromStream(Stream<String> linestream, boolean withStart, String[] headerArray, boolean nor) -
typeFromStream
public static GorDataType typeFromStream(Stream<String> linestream, boolean withStart, String[] headerArray, boolean nor, boolean base128)
-