Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Scala 优化withColumn when子句_Scala_Apache Spark_Azure Databricks - Fatal编程技术网

Scala 优化withColumn when子句

Scala 优化withColumn when子句,scala,apache-spark,azure-databricks,Scala,Apache Spark,Azure Databricks,我有以下代码 def GetCompletionForS4(location: String): DataFrame = { var dfSubSystem = GetTasksFor(location, "S4").as("Tasks") .join(GetCertsFor(location, "S4").as("Certs"),$"Tasks.SystemX" === $"Certs.SystemX" && $"Tasks.StageX" === $"Certs.Stage

我有以下代码

def GetCompletionForS4(location: String): DataFrame = {

var dfSubSystem = GetTasksFor(location, "S4").as("Tasks")
.join(GetCertsFor(location, "S4").as("Certs"),$"Tasks.SystemX" === $"Certs.SystemX" && $"Tasks.StageX" === $"Certs.StageX" , "outer")
.join(GetPTTasksFor(location, "S4").as("PT"), $"Tasks.SystemX" === $"PT.SystemX" && $"Tasks.StageX"=== $"PT.StageX", "outer")
  .withColumn("SystemizationId", coalesce(col("Tasks.SystemX"), col("Certs.SystemX"), col("PT.SystemX")))
  .withColumn("CommissioningStage", coalesce(col("Tasks.StageX"), col("Certs.StageX"), col("PT.StageX")))
  .withColumn("fPercentageClosed", when((col("PT.SystemX")).isNull,  coalesce(col("Tasks.CountX"), lit(0)).cast("double") * 0.9  + coalesce(col("Certs.CountX"), lit(0)).cast("double") * 0.1)
                                   .otherwise(coalesce(col("Tasks.CountX"), lit(0)).cast("double") * 0.6 + coalesce(col("PT.CountX"), lit(0)).cast("double") * 0.3  + coalesce(col("Certs.CountX"), lit(0)).cast("double") * 0.1)
             )
.withColumn("fActualStartDate", when(col("Tasks.ActualStartDateX").isNull,
                                     when(col("Certs.ActualStartDateX").isNull, col("PT.ActualStartDateX"))
                                     .otherwise(
                                                 when(col("PT.ActualStartDateX").isNull, col("Certs.ActualStartDateX"))
                                                 .otherwise(
                                                             when(col("Certs.ActualStartDateX")< col("PT.ActualStartDateX"), col("Certs.ActualStartDateX")).otherwise(col("PT.ActualStartDateX"))
                                                           )
                                               )
                                    )
                                    .otherwise(
                                                when(col("Certs.ActualStartDateX").isNull, 
                                                     when(col("PT.ActualStartDateX").isNull, col("Tasks.ActualStartDateX")).otherwise(
                                                                                                                                       when(col("PT.ActualStartDateX") < col("Tasks.ActualStartDateX"), col("PT.ActualStartDateX")).otherwise(col("Tasks.ActualStartDateX")) 
                                                                                                                                     ) 
                                                    )
                                                .otherwise(
                                                            when(col("PT.ActualStartDateX").isNull,
                                                                  when(col("Certs.ActualStartDateX") < col("Tasks.ActualStartDateX"), col("Certs.ActualStartDateX")).otherwise(col("Tasks.ActualStartDateX"))
                                                                )
                                                            .otherwise(
                                                                        when(col("Certs.ActualStartDateX") < col("Tasks.ActualStartDateX") , 
                                                                             when(col("Certs.ActualStartDateX") < col("PT.ActualStartDateX"), col("Certs.ActualStartDateX")).otherwise(col("PT.ActualStartDateX"))
                                                                            )
                                                                        .otherwise(
                                                                                    when(col("Tasks.ActualStartDateX") < col("PT.ActualStartDateX"), col("Tasks.ActualStartDateX")).otherwise(col("PT.ActualStartDateX"))
                                                                                  )

                                                                      )
                                                          )
                                              )

           )
.withColumn("fActualEndDate", when(col("PT.SystemX").isNull,
                                   when(col("Tasks.ActualEndDateX").isNull,null)
                                   .otherwise(
                                      when(col("Certs.ActualEndDateX").isNull, null)
                                     .otherwise(
                                       when(col("Tasks.ActualEndDateX") > col("Certs.ActualEndDateX"), col("Tasks.ActualEndDateX"))
                                       .otherwise(col("Certs.ActualEndDateX"))
                                     )
                                    )
                                  )
                              .otherwise(
                                          when(col("PT.ActualEndDateX").isNull || col("Certs.ActualEndDateX").isNull || col("Tasks.ActualEndDateX").isNull, null)
                                         .otherwise(
                                                    when(col("Tasks.ActualEndDateX") > col("Certs.ActualEndDateX"),
                                                           when(col("Tasks.ActualEndDateX") > col("PT.ActualEndDateX") , col("Tasks.ActualEndDateX")).otherwise(col("PT.ActualEndDateX"))
                                                        )
                                                    .otherwise(
                                                                 when(col("Certs.ActualEndDateX") > col("PT.ActualEndDateX") , col("Certs.ActualEndDateX")).otherwise(col("PT.ActualEndDateX"))
                                                              )
                                                   )
                                        )
           )
  .select("SystemizationId",
          "CommissioningStage",
          "fPercentageClosed",
          "fActualStartDate",
          "fActualEndDate"
         )


return dfSubSystem
}
def GetCompletionForS4(位置:字符串):数据帧={ var dfSubSystem=GetTasksFor(位置,“S4”).as(“任务”) .join(GetCertsFor(location,“S4”)。作为(“Certs”),$“Tasks.SystemX”====$“Certs.SystemX”&&&$“Tasks.StageX”====$“Certs.StageX”,“outer”) .join(GetPTTasksFor(location,“S4”).as(“PT”),$“Tasks.SystemX”===$“PT.SystemX”和&$“Tasks.StageX”===$“PT.StageX”,“outer”) .withColumn(“SystemizationId”、联合(col(“Tasks.SystemX”)、col(“Certs.SystemX”)、col(“PT.SystemX”)) .withColumn(“CommissioningStage”,联合(col(“Tasks.StageX”),col(“Certs.StageX”),col(“PT.StageX”)) .withColumn(“fPercentageClosed”,当((col(“PT.SystemX”))为空时,合并(col(“Tasks.CountX”)、点亮(0)).cast(“double”)*0.9+合并(col(“Certs.CountX”)、点亮(0)).cast(“double”)*0.1) 。否则(合并(列(“Tasks.CountX”)、lit(0)).cast(“double”)*0.6+合并(列(“PT.CountX”)、lit(0)).cast(“double”)*0.3+合并(列(“Certs.CountX”)、lit(0)).cast(“double”)*0.1) ) .withColumn(“fActualStartDate”),当(col(“Tasks.ActualStartDateX”)为空时, 当(col(“Certs.ActualStartDateX”).isNull时,col(“PT.ActualStartDateX”)) .否则( 当(col(“PT.ActualStartDateX”).isNull时,col(“Certs.ActualStartDateX”)) .否则( 当(col(“Certs.ActualStartDateX”)col(“Certs.ActualEndDateX”)、col(“Tasks.ActualEndDateX”)) 。否则(列(“证书实际日期”)) ) ) ) .否则( 当(col(“PT.ActualEndDateX”).isNull | | col(“Certs.ActualEndDateX”).isNull | | col(“Tasks.ActualEndDateX”).isNull,null) .否则( 当(col(“Tasks.ActualEndDateX”)>col(“Certs.ActualEndDateX”), 当(col(“Tasks.ActualEndDateX”)>col(“PT.ActualEndDateX”)、col(“Tasks.ActualEndDateX”)。否则(col(“PT.ActualEndDateX”)) ) .否则( 当(col(“Certs.ActualEndDateX”)>col(“PT.ActualEndDateX”)、col(“Certs.ActualEndDateX”)。否则(col(“PT.ActualEndDateX”)) ) ) ) ) .select(“SystemizationId”, “委任阶段”, “fPercentageClosed”, “事实开始日期”, “事实日期” ) 返回dfSubSystem } 我如何优化它