Package gorsat.process
Class SparkRowUtilities
- java.lang.Object
-
- gorsat.process.SparkRowUtilities
-
public class SparkRowUtilities extends java.lang.Object
-
-
Constructor Summary
Constructors Constructor Description SparkRowUtilities()
-
Method Summary
All Methods Static Methods Concrete Methods Modifier and Type Method Description static java.util.List<java.lang.String>createMapList(java.util.Map<java.lang.String,java.lang.String> createMap, java.util.Map<java.lang.String,java.lang.String> defMap, java.lang.String creates)static java.lang.StringcreateMapString(java.util.Map<java.lang.String,java.lang.String> createMap, java.util.Map<java.lang.String,java.lang.String> defMap, java.lang.String creates)static java.lang.StringgenerateTempViewName(java.lang.String fileName, boolean usegorpipe, java.lang.String filter, java.lang.String chr, int pos, int end)static java.lang.StringgenerateTempViewName(java.lang.String fileName, boolean usegorpipe, java.lang.String filter, java.lang.String chr, int pos, int end, java.util.List<java.time.Instant> inst)static java.util.function.Predicate<java.lang.String>getFileEndingPredicate()static java.util.function.Predicate<java.lang.String>getFileEndingTimestampPredicate()static org.apache.spark.sql.types.StructTypegor2Schema(java.lang.String header, org.gorpipe.gor.model.Row types)static GorDataTypegorCmdSchema(java.lang.String[] gorcmds, org.gorpipe.spark.GorSparkSession gorSparkSession)static GorDataTypegorCmdSchema(java.lang.String gorcmd, org.gorpipe.spark.GorSparkSession gorSparkSession, boolean nor)static org.apache.spark.sql.types.StructTypegorDataTypeToStructType(GorDataType gorDataType)static GorDataTypeinferDataTypes(java.io.InputStream is, java.lang.String fileName, boolean isGorz, boolean nor)static GorDataTypeinferDataTypes(org.gorpipe.gor.model.FileReader fileReader, java.lang.String fileName, boolean isGorz, boolean nor)static org.apache.spark.sql.types.StructTypeinferSchema(java.io.InputStream fileStream, java.lang.String fileName, boolean nor, boolean isGorz)static org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row>registerFile(java.lang.String[] fns, java.lang.String name, java.lang.String profile, org.gorpipe.spark.GorSparkSession gorSparkSession, java.lang.String standalone, java.lang.String cacheDir, boolean usestreaming, java.lang.String filter, java.lang.String filterFile, java.lang.String filterColumn, java.lang.String splitFile, boolean nor, java.lang.String chr, int pos, int end, java.lang.String jobid, java.lang.String cacheFile, boolean cpp, boolean tag, org.apache.spark.sql.types.StructType schema, java.util.Map<java.lang.String,java.lang.String> readOptions)static RowDataTypetranslatePath(java.lang.String fn, java.lang.String standalone, org.gorpipe.gor.model.DriverBackedFileReader fileReader)static GorDataTypetypeFromParquetLine(org.gorpipe.gor.model.ParquetLine pl, boolean withStart, java.lang.String[] header)static GorDataTypetypeFromStream(java.util.stream.Stream<java.lang.String> linestream, boolean withStart, java.lang.String[] headerArray, boolean nor)static GorDataTypetypeFromStream(java.util.stream.Stream<java.lang.String> linestream, boolean withStart, java.lang.String[] headerArray, boolean nor, boolean base128)
-
-
-
Method Detail
-
getFileEndingPredicate
public static java.util.function.Predicate<java.lang.String> getFileEndingPredicate()
-
getFileEndingTimestampPredicate
public static java.util.function.Predicate<java.lang.String> getFileEndingTimestampPredicate()
-
createMapString
public static java.lang.String createMapString(java.util.Map<java.lang.String,java.lang.String> createMap, java.util.Map<java.lang.String,java.lang.String> defMap, java.lang.String creates)
-
createMapList
public static java.util.List<java.lang.String> createMapList(java.util.Map<java.lang.String,java.lang.String> createMap, java.util.Map<java.lang.String,java.lang.String> defMap, java.lang.String creates)
-
generateTempViewName
public static java.lang.String generateTempViewName(java.lang.String fileName, boolean usegorpipe, java.lang.String filter, java.lang.String chr, int pos, int end)
-
generateTempViewName
public static java.lang.String generateTempViewName(java.lang.String fileName, boolean usegorpipe, java.lang.String filter, java.lang.String chr, int pos, int end, java.util.List<java.time.Instant> inst)
-
gor2Schema
public static org.apache.spark.sql.types.StructType gor2Schema(java.lang.String header, org.gorpipe.gor.model.Row types)
-
gorDataTypeToStructType
public static org.apache.spark.sql.types.StructType gorDataTypeToStructType(GorDataType gorDataType)
-
inferSchema
public static org.apache.spark.sql.types.StructType inferSchema(java.io.InputStream fileStream, java.lang.String fileName, boolean nor, boolean isGorz) throws java.io.IOException, java.util.zip.DataFormatException- Throws:
java.io.IOExceptionjava.util.zip.DataFormatException
-
translatePath
public static RowDataType translatePath(java.lang.String fn, java.lang.String standalone, org.gorpipe.gor.model.DriverBackedFileReader fileReader) throws java.io.IOException
- Throws:
java.io.IOException
-
gorCmdSchema
public static GorDataType gorCmdSchema(java.lang.String gorcmd, org.gorpipe.spark.GorSparkSession gorSparkSession, boolean nor)
-
gorCmdSchema
public static GorDataType gorCmdSchema(java.lang.String[] gorcmds, org.gorpipe.spark.GorSparkSession gorSparkSession)
-
registerFile
public static org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> registerFile(java.lang.String[] fns, java.lang.String name, java.lang.String profile, org.gorpipe.spark.GorSparkSession gorSparkSession, java.lang.String standalone, java.lang.String cacheDir, boolean usestreaming, java.lang.String filter, java.lang.String filterFile, java.lang.String filterColumn, java.lang.String splitFile, boolean nor, java.lang.String chr, int pos, int end, java.lang.String jobid, java.lang.String cacheFile, boolean cpp, boolean tag, org.apache.spark.sql.types.StructType schema, java.util.Map<java.lang.String,java.lang.String> readOptions) throws java.io.IOException, java.util.zip.DataFormatException- Throws:
java.io.IOExceptionjava.util.zip.DataFormatException
-
inferDataTypes
public static GorDataType inferDataTypes(org.gorpipe.gor.model.FileReader fileReader, java.lang.String fileName, boolean isGorz, boolean nor) throws java.io.IOException, java.util.zip.DataFormatException
- Throws:
java.io.IOExceptionjava.util.zip.DataFormatException
-
inferDataTypes
public static GorDataType inferDataTypes(java.io.InputStream is, java.lang.String fileName, boolean isGorz, boolean nor) throws java.io.IOException, java.util.zip.DataFormatException
- Throws:
java.io.IOExceptionjava.util.zip.DataFormatException
-
typeFromParquetLine
public static GorDataType typeFromParquetLine(org.gorpipe.gor.model.ParquetLine pl, boolean withStart, java.lang.String[] header)
-
typeFromStream
public static GorDataType typeFromStream(java.util.stream.Stream<java.lang.String> linestream, boolean withStart, java.lang.String[] headerArray, boolean nor)
-
typeFromStream
public static GorDataType typeFromStream(java.util.stream.Stream<java.lang.String> linestream, boolean withStart, java.lang.String[] headerArray, boolean nor, boolean base128)
-
-