Scala 火花窗函数中未计算右值
我有下表 我需要转换usign Spark scala窗口函数,如下所示 使用的代码在这里。我无法获取所需的AssignmentId值Scala 火花窗函数中未计算右值,scala,apache-spark,Scala,Apache Spark,我有下表 我需要转换usign Spark scala窗口函数,如下所示 使用的代码在这里。我无法获取所需的AssignmentId值 import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql._ import org.apache.spark.sql.types._ val data = Seq( Row("1"
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
import org.apache.spark.sql._
import org.apache.spark.sql.types._
val data = Seq(
Row("1","E19819","1","2019-11-03","A6"),
Row("2","E19819","1","2019-11-02","A4"),
Row("3","E19819","1","2019-11-01","A3"),
Row("4","E19819","0","2019-11-01","A1"),
Row("5","E19819","0","2019-11-01","A2"),
Row("6","E19819","1","2019-10-28","A5")
)
val schema = StructType(
List(
StructField("UniqueId", StringType, true),
StructField("CustomerId", StringType, true),
StructField("Backoffice", StringType, true),
StructField("TxDateBase", StringType, true),
StructField("AssignmtId_orig", StringType, true)
)
)
val dfCustData = spark.createDataFrame(
spark.sparkContext.parallelize(data),
schema
).
withColumn("TxDate", to_date(col("TxDateBase")))
.withColumn("AssignmtId", when ( col("Backoffice")===1, col("AssignmtId_orig")).otherwise(null))
.drop("TxDateBase")
val cols = Seq("AssignmtId")
val w1 = Window
.partitionBy("CustomerId")
.orderBy($"TxDate",$"Backoffice".desc,$"UniqueId".desc)
.rangeBetween(Window.unboundedPreceding, Window.currentRow)
val w2 = Window
.partitionBy("CustomerId")
.orderBy($"TxDate",$"Backoffice".desc,$"UniqueId".desc)
.rangeBetween(Window.currentRow, Window.unboundedFollowing)
val dfCustTransformedData = cols .foldLeft(dfCustData)((dfCustupdated, columnName) =>
dfCustupdated.withColumn(columnName,
coalesce(col(columnName),
first(columnName, ignoreNulls = true).over(w2),
last(columnName, ignoreNulls = true).over(w1)
))
)
dfCustTransformedData.orderBy($"TxDate".desc,$"Backoffice".desc, $"uniqueId").show()
对于uniqueid为4和5的行,AssignmentId应设置为A3。现在它被设置为A4请使用编程创建的dataframeRoth(添加的可执行代码)发布一个可复制的示例