Package gorsat.process
Class SparkRowSource
java.lang.Object
org.gorpipe.gor.model.GenomicIteratorBase
gorsat.process.ProcessSource
gorsat.process.SparkRowSource
- All Implemented Interfaces:
AutoCloseable,Iterator<org.gorpipe.gor.model.Row>,org.gorpipe.gor.model.GenomicIterator,org.gorpipe.gor.model.RowSourceStats
public class SparkRowSource
extends gorsat.process.ProcessSource
Created by sigmar on 12/02/16.
-
Field Summary
Fields inherited from class org.gorpipe.gor.model.GenomicIteratorBase
statsSenderAnnotation, statsSenderName -
Constructor Summary
ConstructorsConstructorDescriptionSparkRowSource(String[] cmds, String type, boolean nor, org.gorpipe.gor.session.GorSession gpSession, String chr, int pos, int end, int bs) SparkRowSource(String sql, String profile, String parquet, String type, boolean nor, org.gorpipe.spark.GorSparkSession gpSession, String filter, String filterFile, String filterColumn, String splitFile, String chr, int pos, int end, boolean usestreaming, String jobId, boolean useCpp, String parts, int buckets, boolean tag, String ddl, String format, String option) -
Method Summary
Modifier and TypeMethodDescriptionstatic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>checkNested(String cmd, org.gorpipe.gor.session.GorSession gpSession, String[] errorStr) static booleancheckNor(org.apache.spark.sql.types.StructField[] fields) org.apache.spark.sql.Dataset<? extends org.gorpipe.gor.model.Row>checkRowFormat(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset) voidclose()org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row>voidgor()voidgorpipe(gorsat.Commands.Analysis pipeStep, boolean gor) static org.apache.spark.sql.Dataset<org.gorpipe.gor.model.Row>booleanhasNext()voidinit()booleanbooleanisNor()org.gorpipe.gor.model.Rownext()booleanpushdownCalc(String formula, String colName) booleanpushdownCmd(String cmd) booleanpushdownFilter(String gorwhere) booleanpushdownGor(String gor) booleanpushdownSelect(String[] cols) booleanpushdownTop(int limit) booleanpushdownWrite(String filename) static org.apache.spark.sql.types.StructTypeschemaFromRow(String[] header, org.gorpipe.gor.model.Row row) booleanMethods inherited from class org.gorpipe.gor.model.GenomicIteratorBase
clone, decStat, getBufferSize, getContext, getSourceName, getTypes, incStat, init, initStats, isSourceAlreadyInserted, setBufferSize, setContext, setHeader, setSourceAlreadyInserted, setSourceName, setTypesMethods inherited from class java.lang.Object
equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitMethods inherited from interface org.gorpipe.gor.model.GenomicIterator
filter, getMonitor, moveToPosition, moveToPosition, seek, select, setRequestedRangeMethods inherited from interface java.util.Iterator
forEachRemaining, removeMethods inherited from interface org.gorpipe.gor.model.RowSourceStats
getAvgBasesPerMilliSecond, getAvgBatchSize, getAvgRowsPerMilliSecond, getAvgSeekTimeMilliSecond, getCurrentBatchLoc, getCurrentBatchRow, getCurrentBatchSize
-
Constructor Details
-
SparkRowSource
public SparkRowSource(String sql, String profile, String parquet, String type, boolean nor, org.gorpipe.spark.GorSparkSession gpSession, String filter, String filterFile, String filterColumn, String splitFile, String chr, int pos, int end, boolean usestreaming, String jobId, boolean useCpp, String parts, int buckets, boolean tag, String ddl, String format, String option) throws IOException, DataFormatException - Throws:
IOExceptionDataFormatException
-
SparkRowSource
-
-
Method Details
-
init
public void init() -
isNor
public boolean isNor() -
getDataset
public org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> getDataset() -
gorpipe
public void gorpipe(gorsat.Commands.Analysis pipeStep, boolean gor) -
gorpipe
public static org.apache.spark.sql.Dataset<org.gorpipe.gor.model.Row> gorpipe(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset, String gor) -
gor
public void gor() -
schemaFromRow
public static org.apache.spark.sql.types.StructType schemaFromRow(String[] header, org.gorpipe.gor.model.Row row) -
checkNested
-
checkNor
public static boolean checkNor(org.apache.spark.sql.types.StructField[] fields) -
hasNext
public boolean hasNext() -
next
public org.gorpipe.gor.model.Row next() -
seek
-
close
public void close() -
setRange
- Specified by:
setRangein classgorsat.process.ProcessSource
-
getHeader
- Specified by:
getHeaderin interfaceorg.gorpipe.gor.model.GenomicIterator- Overrides:
getHeaderin classorg.gorpipe.gor.model.GenomicIteratorBase
-
isBuffered
public boolean isBuffered() -
checkRowFormat
public org.apache.spark.sql.Dataset<? extends org.gorpipe.gor.model.Row> checkRowFormat(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset) -
pushdownFilter
-
pushdownCalc
-
pushdownSelect
-
pushdownWrite
-
pushdownCmd
-
analyse
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> analyse(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset, String gor) -
pushdownGor
-
pushdownTop
public boolean pushdownTop(int limit)
-