Scala 优化withColumn when子句
我有以下代码Scala 优化withColumn when子句,scala,apache-spark,azure-databricks,Scala,Apache Spark,Azure Databricks,我有以下代码 def GetCompletionForS4(location: String): DataFrame = { var dfSubSystem = GetTasksFor(location, "S4").as("Tasks") .join(GetCertsFor(location, "S4").as("Certs"),$"Tasks.SystemX" === $"Certs.SystemX" && $"Tasks.StageX" === $"Certs.Stage
def GetCompletionForS4(location: String): DataFrame = {
var dfSubSystem = GetTasksFor(location, "S4").as("Tasks")
.join(GetCertsFor(location, "S4").as("Certs"),$"Tasks.SystemX" === $"Certs.SystemX" && $"Tasks.StageX" === $"Certs.StageX" , "outer")
.join(GetPTTasksFor(location, "S4").as("PT"), $"Tasks.SystemX" === $"PT.SystemX" && $"Tasks.StageX"=== $"PT.StageX", "outer")
.withColumn("SystemizationId", coalesce(col("Tasks.SystemX"), col("Certs.SystemX"), col("PT.SystemX")))
.withColumn("CommissioningStage", coalesce(col("Tasks.StageX"), col("Certs.StageX"), col("PT.StageX")))
.withColumn("fPercentageClosed", when((col("PT.SystemX")).isNull, coalesce(col("Tasks.CountX"), lit(0)).cast("double") * 0.9 + coalesce(col("Certs.CountX"), lit(0)).cast("double") * 0.1)
.otherwise(coalesce(col("Tasks.CountX"), lit(0)).cast("double") * 0.6 + coalesce(col("PT.CountX"), lit(0)).cast("double") * 0.3 + coalesce(col("Certs.CountX"), lit(0)).cast("double") * 0.1)
)
.withColumn("fActualStartDate", when(col("Tasks.ActualStartDateX").isNull,
when(col("Certs.ActualStartDateX").isNull, col("PT.ActualStartDateX"))
.otherwise(
when(col("PT.ActualStartDateX").isNull, col("Certs.ActualStartDateX"))
.otherwise(
when(col("Certs.ActualStartDateX")< col("PT.ActualStartDateX"), col("Certs.ActualStartDateX")).otherwise(col("PT.ActualStartDateX"))
)
)
)
.otherwise(
when(col("Certs.ActualStartDateX").isNull,
when(col("PT.ActualStartDateX").isNull, col("Tasks.ActualStartDateX")).otherwise(
when(col("PT.ActualStartDateX") < col("Tasks.ActualStartDateX"), col("PT.ActualStartDateX")).otherwise(col("Tasks.ActualStartDateX"))
)
)
.otherwise(
when(col("PT.ActualStartDateX").isNull,
when(col("Certs.ActualStartDateX") < col("Tasks.ActualStartDateX"), col("Certs.ActualStartDateX")).otherwise(col("Tasks.ActualStartDateX"))
)
.otherwise(
when(col("Certs.ActualStartDateX") < col("Tasks.ActualStartDateX") ,
when(col("Certs.ActualStartDateX") < col("PT.ActualStartDateX"), col("Certs.ActualStartDateX")).otherwise(col("PT.ActualStartDateX"))
)
.otherwise(
when(col("Tasks.ActualStartDateX") < col("PT.ActualStartDateX"), col("Tasks.ActualStartDateX")).otherwise(col("PT.ActualStartDateX"))
)
)
)
)
)
.withColumn("fActualEndDate", when(col("PT.SystemX").isNull,
when(col("Tasks.ActualEndDateX").isNull,null)
.otherwise(
when(col("Certs.ActualEndDateX").isNull, null)
.otherwise(
when(col("Tasks.ActualEndDateX") > col("Certs.ActualEndDateX"), col("Tasks.ActualEndDateX"))
.otherwise(col("Certs.ActualEndDateX"))
)
)
)
.otherwise(
when(col("PT.ActualEndDateX").isNull || col("Certs.ActualEndDateX").isNull || col("Tasks.ActualEndDateX").isNull, null)
.otherwise(
when(col("Tasks.ActualEndDateX") > col("Certs.ActualEndDateX"),
when(col("Tasks.ActualEndDateX") > col("PT.ActualEndDateX") , col("Tasks.ActualEndDateX")).otherwise(col("PT.ActualEndDateX"))
)
.otherwise(
when(col("Certs.ActualEndDateX") > col("PT.ActualEndDateX") , col("Certs.ActualEndDateX")).otherwise(col("PT.ActualEndDateX"))
)
)
)
)
.select("SystemizationId",
"CommissioningStage",
"fPercentageClosed",
"fActualStartDate",
"fActualEndDate"
)
return dfSubSystem
}
def GetCompletionForS4(位置:字符串):数据帧={
var dfSubSystem=GetTasksFor(位置,“S4”).as(“任务”)
.join(GetCertsFor(location,“S4”)。作为(“Certs”),$“Tasks.SystemX”====$“Certs.SystemX”&&&$“Tasks.StageX”====$“Certs.StageX”,“outer”)
.join(GetPTTasksFor(location,“S4”).as(“PT”),$“Tasks.SystemX”===$“PT.SystemX”和&$“Tasks.StageX”===$“PT.StageX”,“outer”)
.withColumn(“SystemizationId”、联合(col(“Tasks.SystemX”)、col(“Certs.SystemX”)、col(“PT.SystemX”))
.withColumn(“CommissioningStage”,联合(col(“Tasks.StageX”),col(“Certs.StageX”),col(“PT.StageX”))
.withColumn(“fPercentageClosed”,当((col(“PT.SystemX”))为空时,合并(col(“Tasks.CountX”)、点亮(0)).cast(“double”)*0.9+合并(col(“Certs.CountX”)、点亮(0)).cast(“double”)*0.1)
。否则(合并(列(“Tasks.CountX”)、lit(0)).cast(“double”)*0.6+合并(列(“PT.CountX”)、lit(0)).cast(“double”)*0.3+合并(列(“Certs.CountX”)、lit(0)).cast(“double”)*0.1)
)
.withColumn(“fActualStartDate”),当(col(“Tasks.ActualStartDateX”)为空时,
当(col(“Certs.ActualStartDateX”).isNull时,col(“PT.ActualStartDateX”))
.否则(
当(col(“PT.ActualStartDateX”).isNull时,col(“Certs.ActualStartDateX”))
.否则(
当(col(“Certs.ActualStartDateX”)