Package gorsat.spark

Class GorFileFormat

java.lang.Object
org.apache.spark.sql.execution.datasources.TextBasedFileFormat
org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
gorsat.spark.GorFileFormat
All Implemented Interfaces:
Serializable, org.apache.spark.sql.execution.datasources.FileFormat, org.apache.spark.sql.sources.DataSourceRegister

public class GorFileFormat extends org.apache.spark.sql.execution.datasources.csv.CSVFileFormat implements Serializable
See Also:
  • Constructor Summary

    Constructors
    Constructor
    Description
     
  • Method Summary

    Modifier and Type
    Method
    Description
    scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>>
    buildReader(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<String,String> options, org.apache.hadoop.conf.Configuration hadoopConf)
     
    scala.Option<org.apache.spark.sql.types.StructType>
    inferSchema(org.apache.spark.sql.SparkSession session, scala.collection.immutable.Map<String,String> options, scala.collection.Seq<org.apache.hadoop.fs.FileStatus> files)
     
    org.apache.spark.sql.execution.datasources.OutputWriterFactory
    prepareWrite(org.apache.spark.sql.SparkSession sparkSession, org.apache.hadoop.mapreduce.Job job, scala.collection.immutable.Map<String,String> options, org.apache.spark.sql.types.StructType dataSchema)
     
     
    boolean
    supportDataType(org.apache.spark.sql.types.DataType dataType)
     

    Methods inherited from class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat

    equals, hashCode, isSplitable, toString

    Methods inherited from class org.apache.spark.sql.execution.datasources.TextBasedFileFormat

    buildReaderWithPartitionValues, supportBatch, supportFieldName, vectorTypes

    Methods inherited from class java.lang.Object

    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
  • Constructor Details

    • GorFileFormat

      public GorFileFormat()
  • Method Details

    • inferSchema

      public scala.Option<org.apache.spark.sql.types.StructType> inferSchema(org.apache.spark.sql.SparkSession session, scala.collection.immutable.Map<String,String> options, scala.collection.Seq<org.apache.hadoop.fs.FileStatus> files)
      Specified by:
      inferSchema in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      inferSchema in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
    • prepareWrite

      public org.apache.spark.sql.execution.datasources.OutputWriterFactory prepareWrite(org.apache.spark.sql.SparkSession sparkSession, org.apache.hadoop.mapreduce.Job job, scala.collection.immutable.Map<String,String> options, org.apache.spark.sql.types.StructType dataSchema)
      Specified by:
      prepareWrite in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      prepareWrite in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
    • buildReader

      public scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> buildReader(org.apache.spark.sql.SparkSession sparkSession, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<String,String> options, org.apache.hadoop.conf.Configuration hadoopConf)
      Specified by:
      buildReader in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      buildReader in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
    • supportDataType

      public boolean supportDataType(org.apache.spark.sql.types.DataType dataType)
      Specified by:
      supportDataType in interface org.apache.spark.sql.execution.datasources.FileFormat
      Overrides:
      supportDataType in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
    • shortName

      public String shortName()
      Specified by:
      shortName in interface org.apache.spark.sql.sources.DataSourceRegister
      Overrides:
      shortName in class org.apache.spark.sql.execution.datasources.csv.CSVFileFormat