package icu.wuhufly.dws

import icu.wuhufly.SparkHandler
import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, SparkSession}

object machine_running_median02 {
  def main(args: Array[String]): Unit = {
    val handler: SparkHandler = SparkHandler.of()
    val spark: SparkSession = handler.getSpark()
    val sc: SparkContext = spark.sparkContext
    spark.sql("use hudi_gy_dwd")
    val df: DataFrame = spark.sql(
      """
        |select machine_id, machine_factory, duration_time as total_running_time
        |from (
        |select *, max(rn_asc) over(partition by machine_factory) as max
        |from (
        |select *, row_number() over(partition by machine_factory order by duration_time asc) as rn_asc,
        |  row_number() over(partition by machine_factory order by duration_time desc) as rn_desc
        |from (
        | select machine_id, sum(duration_time) as duration_time, machine_factory
        |  from (
        |   select t1.ChangeMachineID as machine_id,
        |     cast(to_timestamp(t1.ChangeEndTime, 'yyyy-MM-dd HH:mm:ss') as long) -
        |       cast(to_timestamp(t1.ChangeStartTime, 'yyyy-MM-dd HH:mm:ss') as long) as duration_time,
        |      t2.MachineFactory as machine_factory
        |    from fact_change_record t1
        |    join dim_machine t2
        |      on t1.ChangeMachineID = t2.BaseMachineID
        |       where t1.ChangeEndTime is not null and t1.ChangeRecordState = '运行'
        |) t1
        |  group by machine_id, machine_factory
        |) t1
        |) t1
        |) t1
        |  where (rn_asc = rn_desc and max %2 = 1)
        |    or (abs(rn_asc - rn_desc) = 1 and max%2 = 0)
        |""".stripMargin)

    handler.writeIntoCK(
      "machine_running_median", df, "machine_id"
    )
  }
}
