c

ch.cern.sparkmeasure

StageMetrics

case class StageMetrics(sparkSession: SparkSession) extends Product with Serializable

Stage Metrics: collects stage-level metrics with Stage granularity and provides aggregation and reporting functions for the end-user

Example usage for stage metrics: val stageMetrics = ch.cern.sparkmeasure.StageMetrics(spark) stageMetrics.runAndMeasure(spark.sql("select count(*) from range(1000) cross join range(1000) cross join range(1000)").show)

The tool is based on using Spark Listeners as data source and collecting metrics in a ListBuffer of a case class that encapsulates Spark task metrics. The List Buffer is then transformed into a DataFrame for ease of reporting and analysis.

Stage metrics are stored in memory and use to produce a report that aggregates resource consumption they can also be consumed "raw" (transformed into a DataFrame and/or saved to a file)

Linear Supertypes
Serializable, Product, Equals, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. StageMetrics
  2. Serializable
  3. Product
  4. Equals
  5. AnyRef
  6. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. Protected

Instance Constructors

  1. new StageMetrics(sparkSession: SparkSession)

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##: Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. def aggregateStageMetrics(nameTempView: String = "PerfStageMetrics"): DataFrame
  5. def aggregateStageMetrics(): LinkedHashMap[String, Long]
  6. def aggregateStageMetricsJavaMap(): Map[String, Long]
  7. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  8. def begin(): Long
  9. var beginSnapshot: Long
  10. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.CloneNotSupportedException]) @native() @HotSpotIntrinsicCandidate()
  11. def createStageMetricsDF(nameTempView: String = "PerfStageMetrics"): DataFrame
  12. def end(): Long
  13. var endSnapshot: Long
  14. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  15. val executorMetricsNames: Array[String]
  16. final def getClass(): Class[_ <: AnyRef]
    Definition Classes
    AnyRef → Any
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  17. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  18. val listenerStage: StageInfoRecorderListener
  19. lazy val logger: Logger
  20. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  21. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  22. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  23. def printMemoryReport(): Unit
  24. def printReport(): Unit
  25. def productElementNames: Iterator[String]
    Definition Classes
    Product
  26. def registerListener(spark: SparkSession, listener: StageInfoRecorderListener): Unit
  27. def removeListener(): Unit
  28. def report(): String
  29. def reportMemory(): String
  30. def reportUsingDataFrame(): String
  31. def runAndMeasure[T](f: => T): T
  32. def saveData(df: DataFrame, fileName: String, fileFormat: String = "json", saveMode: String = "default"): Unit
  33. def sendReportPrometheus(serverIPnPort: String, metricsJob: String, labelName: String = sparkSession.sparkContext.appName, labelValue: String = sparkSession.sparkContext.applicationId): Unit

    Send the metrics to Prometheus.

    Send the metrics to Prometheus. serverIPnPort: String with prometheus pushgateway address, format is hostIP:Port, metricsJob: job name, labelName: metrics label name, default is sparkSession.sparkContext.appName, labelValue: metrics label value, default is sparkSession.sparkContext.applicationId

  34. val sparkSession: SparkSession
  35. val stageInfoVerbose: Boolean
  36. def stagesDuration(): LinkedHashMap[Int, Long]
  37. final def synchronized[T0](arg0: => T0): T0
    Definition Classes
    AnyRef
  38. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException])
  39. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException]) @native()
  40. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException])

Deprecated Value Members

  1. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.Throwable]) @Deprecated
    Deprecated

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped