Package gorsat.process
Class SparkRowSource
- java.lang.Object
-
- org.gorpipe.gor.model.GenomicIteratorBase
-
- gorsat.process.ProcessSource
-
- gorsat.process.SparkRowSource
-
- All Implemented Interfaces:
java.lang.AutoCloseable,java.util.Iterator<org.gorpipe.gor.model.Row>,org.gorpipe.gor.model.GenomicIterator,org.gorpipe.gor.model.RowSourceStats
public class SparkRowSource extends gorsat.process.ProcessSourceCreated by sigmar on 12/02/16.
-
-
Constructor Summary
Constructors Constructor Description SparkRowSource(java.lang.String[] cmds, java.lang.String type, boolean nor, org.gorpipe.gor.session.GorSession gpSession, java.lang.String chr, int pos, int end, int bs)SparkRowSource(java.lang.String sql, java.lang.String profile, java.lang.String parquet, java.lang.String type, boolean nor, org.gorpipe.spark.GorSparkSession gpSession, java.lang.String filter, java.lang.String filterFile, java.lang.String filterColumn, java.lang.String splitFile, java.lang.String chr, int pos, int end, boolean usestreaming, java.lang.String jobId, boolean useCpp, java.lang.String parts, int buckets, boolean tag, java.lang.String ddl, java.lang.String format, java.lang.String option)
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>analyse(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset, java.lang.String gor)java.lang.StringcheckNested(java.lang.String cmd, org.gorpipe.gor.session.GorSession gpSession, java.lang.String[] errorStr)static booleancheckNor(org.apache.spark.sql.types.StructField[] fields)org.apache.spark.sql.Dataset<? extends org.gorpipe.gor.model.Row>checkRowFormat(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset)voidclose()org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row>getDataset()java.lang.StringgetHeader()voidgor()voidgorpipe(gorsat.Commands.Analysis pipeStep, boolean gor)static org.apache.spark.sql.Dataset<org.gorpipe.gor.model.Row>gorpipe(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset, java.lang.String gor)booleanhasNext()voidinit()booleanisBuffered()booleanisNor()org.gorpipe.gor.model.Rownext()booleanpushdownCalc(java.lang.String formula, java.lang.String colName)booleanpushdownCmd(java.lang.String cmd)booleanpushdownFilter(java.lang.String gorwhere)booleanpushdownGor(java.lang.String gor)booleanpushdownSelect(java.lang.String[] cols)booleanpushdownTop(int limit)booleanpushdownWrite(java.lang.String filename)static org.apache.spark.sql.types.StructTypeschemaFromRow(java.lang.String[] header, org.gorpipe.gor.model.Row row)booleanseek(java.lang.String seekChr, int seekPos)java.io.InputStreamsetRange(java.lang.String seekChr, int startPos, int endPos)-
Methods inherited from class org.gorpipe.gor.model.GenomicIteratorBase
clone, decStat, getBufferSize, getContext, getSourceName, getTypes, incStat, init, initStats, isSourceAlreadyInserted, setBufferSize, setContext, setHeader, setSourceAlreadyInserted, setSourceName, setTypes
-
Methods inherited from class java.lang.Object
equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
-
-
-
-
Constructor Detail
-
SparkRowSource
public SparkRowSource(java.lang.String sql, java.lang.String profile, java.lang.String parquet, java.lang.String type, boolean nor, org.gorpipe.spark.GorSparkSession gpSession, java.lang.String filter, java.lang.String filterFile, java.lang.String filterColumn, java.lang.String splitFile, java.lang.String chr, int pos, int end, boolean usestreaming, java.lang.String jobId, boolean useCpp, java.lang.String parts, int buckets, boolean tag, java.lang.String ddl, java.lang.String format, java.lang.String option) throws java.io.IOException, java.util.zip.DataFormatException- Throws:
java.io.IOExceptionjava.util.zip.DataFormatException
-
SparkRowSource
public SparkRowSource(java.lang.String[] cmds, java.lang.String type, boolean nor, org.gorpipe.gor.session.GorSession gpSession, java.lang.String chr, int pos, int end, int bs)
-
-
Method Detail
-
init
public void init()
-
isNor
public boolean isNor()
-
getDataset
public org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> getDataset()
-
gorpipe
public void gorpipe(gorsat.Commands.Analysis pipeStep, boolean gor)
-
gorpipe
public static org.apache.spark.sql.Dataset<org.gorpipe.gor.model.Row> gorpipe(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset, java.lang.String gor)
-
gor
public void gor()
-
schemaFromRow
public static org.apache.spark.sql.types.StructType schemaFromRow(java.lang.String[] header, org.gorpipe.gor.model.Row row)
-
checkNested
public java.lang.String checkNested(java.lang.String cmd, org.gorpipe.gor.session.GorSession gpSession, java.lang.String[] errorStr)
-
checkNor
public static boolean checkNor(org.apache.spark.sql.types.StructField[] fields)
-
hasNext
public boolean hasNext()
-
next
public org.gorpipe.gor.model.Row next()
-
seek
public boolean seek(java.lang.String seekChr, int seekPos)
-
close
public void close()
-
setRange
public java.io.InputStream setRange(java.lang.String seekChr, int startPos, int endPos)- Specified by:
setRangein classgorsat.process.ProcessSource
-
getHeader
public java.lang.String getHeader()
- Specified by:
getHeaderin interfaceorg.gorpipe.gor.model.GenomicIterator- Overrides:
getHeaderin classorg.gorpipe.gor.model.GenomicIteratorBase
-
isBuffered
public boolean isBuffered()
-
checkRowFormat
public org.apache.spark.sql.Dataset<? extends org.gorpipe.gor.model.Row> checkRowFormat(org.apache.spark.sql.Dataset<? extends org.apache.spark.sql.Row> dataset)
-
pushdownFilter
public boolean pushdownFilter(java.lang.String gorwhere)
-
pushdownCalc
public boolean pushdownCalc(java.lang.String formula, java.lang.String colName)
-
pushdownSelect
public boolean pushdownSelect(java.lang.String[] cols)
-
pushdownWrite
public boolean pushdownWrite(java.lang.String filename)
-
pushdownCmd
public boolean pushdownCmd(java.lang.String cmd)
-
analyse
public static org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> analyse(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset, java.lang.String gor)
-
pushdownGor
public boolean pushdownGor(java.lang.String gor)
-
pushdownTop
public boolean pushdownTop(int limit)
-
-