Scala Spark单元测试打破了转换链?

Scala Spark单元测试打破了转换链?,scala,apache-spark,Scala,Apache Spark,这是我的转换函数。我想测试一下。 我应该将每个转换链分解(移动)为单独的函数并单独测试它们,还是只测试整个函数,或者两者都测试 private def transform[T]( spark: SparkSession, data: DataFrame ) = { val beefData = data .filter(col("ingredients").contains("beef") || col("name").contains("beef

这是我的转换函数。我想测试一下。 我应该将每个转换链分解(移动)为单独的函数并单独测试它们,还是只测试整个函数,或者两者都测试

private def transform[T](
      spark: SparkSession,
      data: DataFrame
  ) = {
    val beefData = data
      .filter(col("ingredients").contains("beef") || col("name").contains("beef"))
      .withColumn("total_cook_time", col("cookTime") + col("prepTime"))
      .withColumn(
        "difficulty",
        when(col("total_cook_time") < 30, "easy")
          .when(col("total_cook_time") between (30, 60), "medium")
          .when(col("total_cook_time") > 60, "hard")
      )

    beefData.createOrReplaceTempView("BeefRecipes")
    spark
      .sql(
        "select difficulty, avg(total_cook_time) as avg_total_cooking_time from BeefRecipes group by difficulty"
      )
      .show()

  }
private def transform[T](
火花:火花会话,
数据:数据帧
) = {
val beefData=数据
.filter(col(“配料”)。包含(“牛肉”)| | col(“名称”)。包含(“牛肉”))
.带列(“总烹饪时间”,列(“烹饪时间”)+列(“准备时间”))
.withColumn(
“困难”,
何时(col(“总烹饪时间”)<30,“轻松”)
.何时(col(“总烹饪时间”)介于(30,60),“中等”)
.何时(col(“总烹饪时间”)>60,“硬”)
)
beefData.createOrReplaceTempView(“BeefRecipes”)
火花
.sql(
“按难度从BeefRecipes组中选择难度、平均(总烹饪时间)作为平均总烹饪时间”
)
.show()
}