Package gorsat.spark
Class GorFileFormat
- java.lang.Object
-
- org.apache.spark.sql.execution.datasources.TextBasedFileFormat
-
- org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
-
- gorsat.spark.GorFileFormat
-
- All Implemented Interfaces:
java.io.Serializable,org.apache.spark.sql.execution.datasources.FileFormat,org.apache.spark.sql.sources.DataSourceRegister
public class GorFileFormat extends org.apache.spark.sql.execution.datasources.csv.CSVFileFormat implements java.io.Serializable- See Also:
- Serialized Form
-
-
Constructor Summary
Constructors Constructor Description GorFileFormat()
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>>buildReader(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<java.lang.String,java.lang.String> options, org.apache.hadoop.conf.Configuration hadoopConf)scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>>buildReaderWithPartitionValues(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<java.lang.String,java.lang.String> options, org.apache.hadoop.conf.Configuration hadoopConf)scala.Option<org.apache.spark.sql.types.StructType>inferSchema(org.apache.spark.sql.SparkSession session, scala.collection.immutable.Map<java.lang.String,java.lang.String> options, scala.collection.Seq<org.apache.hadoop.fs.FileStatus> files)booleanisSplitable(org.apache.spark.sql.SparkSession sparkSession, scala.collection.immutable.Map<java.lang.String,java.lang.String> options, org.apache.hadoop.fs.Path path)org.apache.spark.sql.execution.datasources.OutputWriterFactoryprepareWrite(org.apache.spark.sql.SparkSession sparkSession, org.apache.hadoop.mapreduce.Job job, scala.collection.immutable.Map<java.lang.String,java.lang.String> options, org.apache.spark.sql.types.StructType dataSchema)java.lang.StringshortName()booleansupportBatch(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema)booleansupportDataType(org.apache.spark.sql.types.DataType dataType)scala.Option<scala.collection.Seq<java.lang.String>>vectorTypes(org.apache.spark.sql.types.StructType requiredSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.internal.SQLConf sqlConf)
-
-
-
Method Detail
-
inferSchema
public scala.Option<org.apache.spark.sql.types.StructType> inferSchema(org.apache.spark.sql.SparkSession session, scala.collection.immutable.Map<java.lang.String,java.lang.String> options, scala.collection.Seq<org.apache.hadoop.fs.FileStatus> files)- Specified by:
inferSchemain interfaceorg.apache.spark.sql.execution.datasources.FileFormat- Overrides:
inferSchemain classorg.apache.spark.sql.execution.datasources.csv.CSVFileFormat
-
prepareWrite
public org.apache.spark.sql.execution.datasources.OutputWriterFactory prepareWrite(org.apache.spark.sql.SparkSession sparkSession, org.apache.hadoop.mapreduce.Job job, scala.collection.immutable.Map<java.lang.String,java.lang.String> options, org.apache.spark.sql.types.StructType dataSchema)- Specified by:
prepareWritein interfaceorg.apache.spark.sql.execution.datasources.FileFormat- Overrides:
prepareWritein classorg.apache.spark.sql.execution.datasources.csv.CSVFileFormat
-
supportBatch
public boolean supportBatch(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema)- Specified by:
supportBatchin interfaceorg.apache.spark.sql.execution.datasources.FileFormat- Overrides:
supportBatchin classorg.apache.spark.sql.execution.datasources.TextBasedFileFormat
-
vectorTypes
public scala.Option<scala.collection.Seq<java.lang.String>> vectorTypes(org.apache.spark.sql.types.StructType requiredSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.internal.SQLConf sqlConf)- Specified by:
vectorTypesin interfaceorg.apache.spark.sql.execution.datasources.FileFormat- Overrides:
vectorTypesin classorg.apache.spark.sql.execution.datasources.TextBasedFileFormat
-
isSplitable
public boolean isSplitable(org.apache.spark.sql.SparkSession sparkSession, scala.collection.immutable.Map<java.lang.String,java.lang.String> options, org.apache.hadoop.fs.Path path)- Specified by:
isSplitablein interfaceorg.apache.spark.sql.execution.datasources.FileFormat- Overrides:
isSplitablein classorg.apache.spark.sql.execution.datasources.csv.CSVFileFormat
-
buildReader
public scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> buildReader(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<java.lang.String,java.lang.String> options, org.apache.hadoop.conf.Configuration hadoopConf)- Specified by:
buildReaderin interfaceorg.apache.spark.sql.execution.datasources.FileFormat- Overrides:
buildReaderin classorg.apache.spark.sql.execution.datasources.csv.CSVFileFormat
-
buildReaderWithPartitionValues
public scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> buildReaderWithPartitionValues(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<java.lang.String,java.lang.String> options, org.apache.hadoop.conf.Configuration hadoopConf)- Specified by:
buildReaderWithPartitionValuesin interfaceorg.apache.spark.sql.execution.datasources.FileFormat- Overrides:
buildReaderWithPartitionValuesin classorg.apache.spark.sql.execution.datasources.TextBasedFileFormat
-
supportDataType
public boolean supportDataType(org.apache.spark.sql.types.DataType dataType)
- Specified by:
supportDataTypein interfaceorg.apache.spark.sql.execution.datasources.FileFormat- Overrides:
supportDataTypein classorg.apache.spark.sql.execution.datasources.csv.CSVFileFormat
-
shortName
public java.lang.String shortName()
- Specified by:
shortNamein interfaceorg.apache.spark.sql.sources.DataSourceRegister- Overrides:
shortNamein classorg.apache.spark.sql.execution.datasources.csv.CSVFileFormat
-
-