Scala Spark单元测试打破了转换链?
这是我的转换函数。我想测试一下。 我应该将每个转换链分解(移动)为单独的函数并单独测试它们,还是只测试整个函数,或者两者都测试Scala Spark单元测试打破了转换链?,scala,apache-spark,Scala,Apache Spark,这是我的转换函数。我想测试一下。 我应该将每个转换链分解(移动)为单独的函数并单独测试它们,还是只测试整个函数,或者两者都测试 private def transform[T]( spark: SparkSession, data: DataFrame ) = { val beefData = data .filter(col("ingredients").contains("beef") || col("name").contains("beef
private def transform[T](
spark: SparkSession,
data: DataFrame
) = {
val beefData = data
.filter(col("ingredients").contains("beef") || col("name").contains("beef"))
.withColumn("total_cook_time", col("cookTime") + col("prepTime"))
.withColumn(
"difficulty",
when(col("total_cook_time") < 30, "easy")
.when(col("total_cook_time") between (30, 60), "medium")
.when(col("total_cook_time") > 60, "hard")
)
beefData.createOrReplaceTempView("BeefRecipes")
spark
.sql(
"select difficulty, avg(total_cook_time) as avg_total_cooking_time from BeefRecipes group by difficulty"
)
.show()
}
private def transform[T](
火花:火花会话,
数据:数据帧
) = {
val beefData=数据
.filter(col(“配料”)。包含(“牛肉”)| | col(“名称”)。包含(“牛肉”))
.带列(“总烹饪时间”,列(“烹饪时间”)+列(“准备时间”))
.withColumn(
“困难”,
何时(col(“总烹饪时间”)<30,“轻松”)
.何时(col(“总烹饪时间”)介于(30,60),“中等”)
.何时(col(“总烹饪时间”)>60,“硬”)
)
beefData.createOrReplaceTempView(“BeefRecipes”)
火花
.sql(
“按难度从BeefRecipes组中选择难度、平均(总烹饪时间)作为平均总烹饪时间”
)
.show()
}