Package gorsat.spark

Class GorFileFormat

java.lang.Object
org.apache.spark.sql.execution.datasources.TextBasedFileFormat
org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
gorsat.spark.GorFileFormat
All Implemented Interfaces:
java.io.Serializable, org.apache.spark.sql.execution.datasources.FileFormat, org.apache.spark.sql.sources.DataSourceRegister

public class GorFileFormat
extends org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
implements java.io.Serializable
See Also:
Serialized Form
  • Constructor Summary

    Constructors 
    Constructor Description
    GorFileFormat()  
  • Method Summary

    Modifier and Type Method Description
    scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,​scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> buildReader​(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<java.lang.String,​java.lang.String> options, org.apache.hadoop.conf.Configuration hadoopConf)  
    scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,​scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> buildReaderWithPartitionValues​(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<java.lang.String,​java.lang.String> options, org.apache.hadoop.conf.Configuration hadoopConf)  
    scala.Option<org.apache.spark.sql.types.StructType> inferSchema​(org.apache.spark.sql.SparkSession sparkSession, scala.collection.immutable.Map<java.lang.String,​java.lang.String> options, scala.collection.Seq<org.apache.hadoop.fs.FileStatus> files)  
    boolean isSplitable​(org.apache.spark.sql.SparkSession sparkSession, scala.collection.immutable.Map<java.lang.String,​java.lang.String> options, org.apache.hadoop.fs.Path path)  
    org.apache.spark.sql.execution.datasources.OutputWriterFactory prepareWrite​(org.apache.spark.sql.SparkSession sparkSession, org.apache.hadoop.mapreduce.Job job, scala.collection.immutable.Map<java.lang.String,​java.lang.String> options, org.apache.spark.sql.types.StructType dataSchema)  
    java.lang.String shortName()  
    boolean supportBatch​(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema)  
    boolean supportDataType​(org.apache.spark.sql.types.DataType dataType)  
    scala.Option<scala.collection.Seq<java.lang.String>> vectorTypes​(org.apache.spark.sql.types.StructType requiredSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.internal.SQLConf sqlConf)  

    Methods inherited from class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat

    equals, hashCode, toString

    Methods inherited from class java.lang.Object

    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
  • Constructor Details

  • Method Details

    • inferSchema

      public scala.Option<org.apache.spark.sql.types.StructType> inferSchema​(org.apache.spark.sql.SparkSession sparkSession, scala.collection.immutable.Map<java.lang.String,​java.lang.String> options, scala.collection.Seq<org.apache.hadoop.fs.FileStatus> files)
      Specified by:
      inferSchema in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      inferSchema in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
    • prepareWrite

      public org.apache.spark.sql.execution.datasources.OutputWriterFactory prepareWrite​(org.apache.spark.sql.SparkSession sparkSession, org.apache.hadoop.mapreduce.Job job, scala.collection.immutable.Map<java.lang.String,​java.lang.String> options, org.apache.spark.sql.types.StructType dataSchema)
      Specified by:
      prepareWrite in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      prepareWrite in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
    • supportBatch

      public boolean supportBatch​(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema)
      Specified by:
      supportBatch in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      supportBatch in class org.apache.spark.sql.execution.datasources.TextBasedFileFormat
    • vectorTypes

      public scala.Option<scala.collection.Seq<java.lang.String>> vectorTypes​(org.apache.spark.sql.types.StructType requiredSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.internal.SQLConf sqlConf)
      Specified by:
      vectorTypes in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      vectorTypes in class org.apache.spark.sql.execution.datasources.TextBasedFileFormat
    • isSplitable

      public boolean isSplitable​(org.apache.spark.sql.SparkSession sparkSession, scala.collection.immutable.Map<java.lang.String,​java.lang.String> options, org.apache.hadoop.fs.Path path)
      Specified by:
      isSplitable in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      isSplitable in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
    • buildReader

      public scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,​scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> buildReader​(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<java.lang.String,​java.lang.String> options, org.apache.hadoop.conf.Configuration hadoopConf)
      Specified by:
      buildReader in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      buildReader in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
    • buildReaderWithPartitionValues

      public scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,​scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> buildReaderWithPartitionValues​(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<java.lang.String,​java.lang.String> options, org.apache.hadoop.conf.Configuration hadoopConf)
      Specified by:
      buildReaderWithPartitionValues in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      buildReaderWithPartitionValues in class org.apache.spark.sql.execution.datasources.TextBasedFileFormat
    • supportDataType

      public boolean supportDataType​(org.apache.spark.sql.types.DataType dataType)
      Specified by:
      supportDataType in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      supportDataType in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
    • shortName

      public java.lang.String shortName()
      Specified by:
      shortName in interface org.apache.spark.sql.sources.DataSourceRegister
      Overrides:
      shortName in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat