Package gorsat.process
Class SparkRowSource
java.lang.Object
org.gorpipe.gor.model.GenomicIterator
org.gorpipe.model.gor.iterators.RowSource
gorsat.process.ProcessSource
gorsat.process.SparkRowSource
- All Implemented Interfaces:
java.lang.AutoCloseable,java.util.Iterator<org.gorpipe.gor.model.Row>
public class SparkRowSource
extends gorsat.process.ProcessSource
Created by sigmar on 12/02/16.
-
Nested Class Summary
Nested Classes Modifier and Type Class Description static classSparkRowSource.GorDataType -
Constructor Summary
Constructors Constructor Description SparkRowSource(java.lang.String[] cmds, java.lang.String type, boolean nor, org.gorpipe.gor.session.GorSession gpSession, java.lang.String chr, int pos, int end, int bs)SparkRowSource(java.lang.String sql, java.lang.String profile, java.lang.String parquet, java.lang.String type, boolean nor, org.gorpipe.spark.GorSparkSession gpSession, java.lang.String filter, java.lang.String filterFile, java.lang.String filterColumn, java.lang.String splitFile, java.lang.String chr, int pos, int end, boolean usestreaming, java.lang.String jobId, boolean useCpp, java.lang.String parts, int buckets, boolean tag) -
Method Summary
Modifier and Type Method Description static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>analyse(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset, java.lang.String gor)static java.lang.StringcheckNested(java.lang.String cmd, org.gorpipe.gor.session.GorSession gpSession, java.lang.String[] errorStr)static booleancheckNor(org.apache.spark.sql.types.StructField[] fields)org.apache.spark.sql.Dataset<? extends org.gorpipe.gor.model.Row>checkRowFormat(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset)voidclose()static java.lang.StringgenerateTempViewName(java.lang.String fileName, boolean usegorpipe, java.lang.String filter, java.lang.String chr, int pos, int end)org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row>getDataset()java.lang.StringgetHeader()voidgor()static org.apache.spark.sql.types.StructTypegor2Schema(java.lang.String header, org.gorpipe.gor.model.Row types)static SparkRowSource.GorDataTypegorCmdSchema(java.lang.String gorcmd, org.gorpipe.spark.GorSparkSession gorSparkSession, boolean nor)voidgorpipe(gorsat.Commands.Analysis pipeStep, boolean gor)static org.apache.spark.sql.Dataset<org.gorpipe.gor.model.Row>gorpipe(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset, java.lang.String gor)booleanhasNext()static SparkRowSource.GorDataTypeinferDataTypes(java.nio.file.Path filePath, java.lang.String fileName, boolean isGorz, boolean nor)static org.apache.spark.sql.types.StructTypeinferSchema(java.nio.file.Path filePath, java.lang.String fileName, boolean nor, boolean isGorz)voidinit()booleanisBuffered()booleanisNor()org.gorpipe.gor.model.Rownext()booleanpushdownCalc(java.lang.String formula, java.lang.String colName)booleanpushdownCmd(java.lang.String cmd)booleanpushdownFilter(java.lang.String gorwhere)booleanpushdownGor(java.lang.String gor)booleanpushdownSelect(java.lang.String[] cols)booleanpushdownTop(int limit)booleanpushdownWrite(java.lang.String filename)static org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row>registerFile(java.lang.String[] fns, java.lang.String name, java.lang.String profile, org.gorpipe.spark.GorSparkSession gorSparkSession, java.lang.String standalone, java.nio.file.Path fileroot, boolean usestreaming, java.lang.String filter, java.lang.String filterFile, java.lang.String filterColumn, java.lang.String splitFile, boolean nor, java.lang.String chr, int pos, int end, java.lang.String jobid, java.lang.String cacheFile, boolean cpp, boolean tag)static org.apache.spark.sql.types.StructTypeschemaFromRow(java.lang.String[] header, org.gorpipe.gor.model.Row row)voidsetPosition(java.lang.String seekChr, int seekPos)java.io.InputStreamsetRange(java.lang.String seekChr, int startPos, int endPos)static java.lang.StringtranslatePath(java.lang.String fn, java.nio.file.Path fileroot, java.lang.String standalone)static SparkRowSource.GorDataTypetypeFromParquetLine(org.gorpipe.gor.model.ParquetLine pl, boolean withStart, java.lang.String[] header)static SparkRowSource.GorDataTypetypeFromStream(java.util.stream.Stream<java.lang.String> linestream, boolean withStart, java.lang.String[] headerArray, boolean nor)static SparkRowSource.GorDataTypetypeFromStream(java.util.stream.Stream<java.lang.String> linestream, boolean withStart, java.lang.String[] headerArray, boolean nor, boolean base128)Methods inherited from class org.gorpipe.model.gor.iterators.RowSource
bufferSize, bufferSize_$eq, clone, getAvgBasesPerMilliSecond, getAvgBatchSize, getAvgRowsPerMilliSecond, getAvgSeekTimeMilliSecond, getBufferSize, getCurrentBatchLoc, getCurrentBatchRow, getCurrentBatchSize, getEx, getGorHeader, getParent, moveToPosition, moveToPosition$default$3, next, parent, parent_$eq, seek, setBufferSize, setEx, setParent, terminateReadingMethods inherited from class org.gorpipe.gor.model.GenomicIterator
decStat, filter, getColnum, getContext, getLookup, getMonitor, getSourceName, incStat, init, initStats, isSourceAlreadyInserted, seek, select, selectHeader, setColnum, setContext, setHeader, setSourceAlreadyInserted, setSourceName
-
Constructor Details
-
SparkRowSource
public SparkRowSource(java.lang.String sql, java.lang.String profile, java.lang.String parquet, java.lang.String type, boolean nor, org.gorpipe.spark.GorSparkSession gpSession, java.lang.String filter, java.lang.String filterFile, java.lang.String filterColumn, java.lang.String splitFile, java.lang.String chr, int pos, int end, boolean usestreaming, java.lang.String jobId, boolean useCpp, java.lang.String parts, int buckets, boolean tag) throws java.io.IOException, java.util.zip.DataFormatException- Throws:
java.io.IOExceptionjava.util.zip.DataFormatException
-
SparkRowSource
public SparkRowSource(java.lang.String[] cmds, java.lang.String type, boolean nor, org.gorpipe.gor.session.GorSession gpSession, java.lang.String chr, int pos, int end, int bs)
-
-
Method Details
-
inferDataTypes
public static SparkRowSource.GorDataType inferDataTypes(java.nio.file.Path filePath, java.lang.String fileName, boolean isGorz, boolean nor) throws java.io.IOException, java.util.zip.DataFormatException- Throws:
java.io.IOExceptionjava.util.zip.DataFormatException
-
typeFromParquetLine
public static SparkRowSource.GorDataType typeFromParquetLine(org.gorpipe.gor.model.ParquetLine pl, boolean withStart, java.lang.String[] header) -
typeFromStream
public static SparkRowSource.GorDataType typeFromStream(java.util.stream.Stream<java.lang.String> linestream, boolean withStart, java.lang.String[] headerArray, boolean nor) -
typeFromStream
public static SparkRowSource.GorDataType typeFromStream(java.util.stream.Stream<java.lang.String> linestream, boolean withStart, java.lang.String[] headerArray, boolean nor, boolean base128) -
getDataset
public org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> getDataset() -
generateTempViewName
public static java.lang.String generateTempViewName(java.lang.String fileName, boolean usegorpipe, java.lang.String filter, java.lang.String chr, int pos, int end) -
gor2Schema
public static org.apache.spark.sql.types.StructType gor2Schema(java.lang.String header, org.gorpipe.gor.model.Row types) -
inferSchema
public static org.apache.spark.sql.types.StructType inferSchema(java.nio.file.Path filePath, java.lang.String fileName, boolean nor, boolean isGorz) throws java.io.IOException, java.util.zip.DataFormatException- Throws:
java.io.IOExceptionjava.util.zip.DataFormatException
-
translatePath
public static java.lang.String translatePath(java.lang.String fn, java.nio.file.Path fileroot, java.lang.String standalone) -
gorCmdSchema
public static SparkRowSource.GorDataType gorCmdSchema(java.lang.String gorcmd, org.gorpipe.spark.GorSparkSession gorSparkSession, boolean nor) -
registerFile
public static org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> registerFile(java.lang.String[] fns, java.lang.String name, java.lang.String profile, org.gorpipe.spark.GorSparkSession gorSparkSession, java.lang.String standalone, java.nio.file.Path fileroot, boolean usestreaming, java.lang.String filter, java.lang.String filterFile, java.lang.String filterColumn, java.lang.String splitFile, boolean nor, java.lang.String chr, int pos, int end, java.lang.String jobid, java.lang.String cacheFile, boolean cpp, boolean tag) throws java.io.IOException, java.util.zip.DataFormatException- Throws:
java.io.IOExceptionjava.util.zip.DataFormatException
-
init
public void init() -
isNor
public boolean isNor() -
gorpipe
public void gorpipe(gorsat.Commands.Analysis pipeStep, boolean gor) -
gorpipe
public static org.apache.spark.sql.Dataset<org.gorpipe.gor.model.Row> gorpipe(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset, java.lang.String gor) -
gor
public void gor() -
schemaFromRow
public static org.apache.spark.sql.types.StructType schemaFromRow(java.lang.String[] header, org.gorpipe.gor.model.Row row) -
checkNested
public static java.lang.String checkNested(java.lang.String cmd, org.gorpipe.gor.session.GorSession gpSession, java.lang.String[] errorStr) -
checkNor
public static boolean checkNor(org.apache.spark.sql.types.StructField[] fields) -
hasNext
public boolean hasNext()- Specified by:
hasNextin interfacejava.util.Iterator<org.gorpipe.gor.model.Row>- Overrides:
hasNextin classorg.gorpipe.gor.model.GenomicIterator
-
next
public org.gorpipe.gor.model.Row next()- Specified by:
nextin interfacejava.util.Iterator<org.gorpipe.gor.model.Row>- Overrides:
nextin classorg.gorpipe.gor.model.GenomicIterator
-
setPosition
public void setPosition(java.lang.String seekChr, int seekPos)- Overrides:
setPositionin classorg.gorpipe.model.gor.iterators.RowSource
-
close
public void close()- Specified by:
closein interfacejava.lang.AutoCloseable- Specified by:
closein classorg.gorpipe.model.gor.iterators.RowSource
-
setRange
public java.io.InputStream setRange(java.lang.String seekChr, int startPos, int endPos)- Specified by:
setRangein classgorsat.process.ProcessSource
-
getHeader
public java.lang.String getHeader()- Overrides:
getHeaderin classorg.gorpipe.gor.model.GenomicIterator
-
isBuffered
public boolean isBuffered()- Overrides:
isBufferedin classorg.gorpipe.model.gor.iterators.RowSource
-
checkRowFormat
public org.apache.spark.sql.Dataset<? extends org.gorpipe.gor.model.Row> checkRowFormat(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset) -
pushdownFilter
public boolean pushdownFilter(java.lang.String gorwhere)- Overrides:
pushdownFilterin classorg.gorpipe.gor.model.GenomicIterator
-
pushdownCalc
public boolean pushdownCalc(java.lang.String formula, java.lang.String colName)- Overrides:
pushdownCalcin classorg.gorpipe.gor.model.GenomicIterator
-
pushdownSelect
public boolean pushdownSelect(java.lang.String[] cols)- Overrides:
pushdownSelectin classorg.gorpipe.gor.model.GenomicIterator
-
pushdownWrite
public boolean pushdownWrite(java.lang.String filename)- Overrides:
pushdownWritein classorg.gorpipe.gor.model.GenomicIterator
-
pushdownCmd
public boolean pushdownCmd(java.lang.String cmd)- Overrides:
pushdownCmdin classorg.gorpipe.gor.model.GenomicIterator
-
analyse
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> analyse(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset, java.lang.String gor) -
pushdownGor
public boolean pushdownGor(java.lang.String gor)- Overrides:
pushdownGorin classorg.gorpipe.gor.model.GenomicIterator
-
pushdownTop
public boolean pushdownTop(int limit)- Overrides:
pushdownTopin classorg.gorpipe.gor.model.GenomicIterator
-