Scala org.apache.spark.SparkException:无法使用lambda序列化任务

Scala org.apache.spark.SparkException:无法使用lambda序列化任务,scala,apache-spark,serialization,Scala,Apache Spark,Serialization,我对scala和spark很陌生。现在我遇到了一个让我很困惑的问题。请给我一个建议 我正在使用lambda从RDD[Array[String]制作RDD[myEntityClass]。但我遇到了一个错误,它说有空值可以将字符串解析为Long。为了研究这个问题,我实现了一个方法,使我能够使用断点 但是现在我得到了org.apache.spark.SparkException:Task not serializable,我找不到问题所在。下面是我的代码片段,如果你能找到任何东西,请帮助我 def m

我对scala和spark很陌生。现在我遇到了一个让我很困惑的问题。请给我一个建议

我正在使用lambda从
RDD[Array[String]
制作
RDD[myEntityClass]
。但我遇到了一个错误,它说有空值可以将字符串解析为Long。为了研究这个问题,我实现了一个方法,使我能够使用断点

但是现在我得到了
org.apache.spark.SparkException:Task not serializable
,我找不到问题所在。下面是我的代码片段,如果你能找到任何东西,请帮助我

def makingData() : RDD[MyEntityClass] = {
  .
  .
  data.map(row => toMyEntityClass(row))
}

def toMyEntityClass(row : Array[String]) : MyEntityClass = {
  var id = row(0).toLong
  var name = row(1)
  var code = row(2).toLong
  var parentId = row(3).toLong
  var status = row(4)

  MyEntityClass(id, name, code, parentId, status)
}
====最新问题=====

我正在更新我的问题以回应你的建议。我已经有了MyEntityClass作为案例类,如下所示

case class MyEntityClass(id: Long, name: String, code: Long, parentId: Long, status: String)
====附加的堆栈跟踪=====

Task not serializable
org.apache.spark.SparkException: Task not serializable
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:304)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:294)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:122)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2030)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:314)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:313)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.RDD.map(RDD.scala:313)
at com.myproject.repository.MyRepositorySpec.getDummyData(MyRepositorySpec.scala:40)
at com.myproject.repository.MyRepositorySpec$$anonfun$3.apply(MyRepositorySpec.scala:66)
at com.myproject.repository.MyRepositorySpec$$anonfun$3.apply(MyRepositorySpec.scala:65)
at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at org.scalatest.FlatSpecLike$$anon$1.apply(FlatSpecLike.scala:1681)
at org.scalatest.Suite$class.withFixture(Suite.scala:1031)
at org.scalatest.FlatSpec.withFixture(FlatSpec.scala:1691)
at org.scalatest.FlatSpecLike$class.invokeWithFixture$1(FlatSpecLike.scala:1678)
at org.scalatest.FlatSpecLike$$anonfun$runTest$1.apply(FlatSpecLike.scala:1690)
at org.scalatest.FlatSpecLike$$anonfun$runTest$1.apply(FlatSpecLike.scala:1690)
at org.scalatest.SuperEngine.runTestImpl(Engine.scala:287)
at org.scalatest.FlatSpecLike$class.runTest(FlatSpecLike.scala:1690)
at org.scalatest.FlatSpec.runTest(FlatSpec.scala:1691)
at org.scalatest.FlatSpecLike$$anonfun$runTests$1.apply(FlatSpecLike.scala:1748)
at org.scalatest.FlatSpecLike$$anonfun$runTests$1.apply(FlatSpecLike.scala:1748)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:394)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:382)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:382)
at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:371)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:408)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:382)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:382)
at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:377)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:459)
at org.scalatest.FlatSpecLike$class.runTests(FlatSpecLike.scala:1748)
at org.scalatest.FlatSpec.runTests(FlatSpec.scala:1691)
at org.scalatest.Suite$class.run(Suite.scala:1320)
at org.scalatest.FlatSpec.org$scalatest$FlatSpecLike$$super$run(FlatSpec.scala:1691)
at org.scalatest.FlatSpecLike$$anonfun$run$1.apply(FlatSpecLike.scala:1794)
at org.scalatest.FlatSpecLike$$anonfun$run$1.apply(FlatSpecLike.scala:1794)
at org.scalatest.SuperEngine.runImpl(Engine.scala:519)
at org.scalatest.FlatSpecLike$class.run(FlatSpecLike.scala:1794)
at org.scalatest.FlatSpec.run(FlatSpec.scala:1691)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:46)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1340)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1334)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1011)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1010)
at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500)
at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010)
at org.scalatest.tools.Runner$.run(Runner.scala:850)
at org.scalatest.tools.Runner.run(Runner.scala)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:138)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28)
Caused by: java.io.NotSerializableException: org.scalatest.Assertions$AssertionsHelper
Serialization stack:
- object not serializable (class: org.scalatest.Assertions$AssertionsHelper, value: org.scalatest.Assertions$AssertionsHelper@45e639ee)
- field (class: org.scalatest.FlatSpec, name: assertionsHelper, type: class org.scalatest.Assertions$AssertionsHelper)
- object (class com.myproject.repository.MyRepositorySpec, MyRepositorySpec)
- field (class: com.myproject.repository.MyRepositorySpec$$anonfun$getDummyData$1, name: $outer, type: class com.myproject.repository.MyRepositorySpec)
- object (class com.myproject.repository.MyRepositorySpec$$anonfun$getDummyData$1, <function1>)
at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40)
at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47)
at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:84)
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:301)
... 61 more
任务不可序列化
org.apache.spark.SparkException:任务不可序列化
位于org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:304)
位于org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:294)
位于org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:122)
位于org.apache.spark.SparkContext.clean(SparkContext.scala:2030)
位于org.apache.spark.rdd.rdd$$anonfun$map$1.apply(rdd.scala:314)
位于org.apache.spark.rdd.rdd$$anonfun$map$1.apply(rdd.scala:313)
位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
位于org.apache.spark.rdd.rdd.withScope(rdd.scala:306)
位于org.apache.spark.rdd.rdd.map(rdd.scala:313)
在com.myproject.repository.MyRepositorySpec.getDummyData上(MyRepositorySpec.scala:40)
在com.myproject.repository.MyRepositorySpec$$anonfun$3.apply上(MyRepositorySpec.scala:66)
在com.myproject.repository.MyRepositorySpec$$anonfun$3.apply上(MyRepositorySpec.scala:65)
在org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
在org.scalatest.OutcomeOf$class.OutcomeOf(OutcomeOf.scala:85)
在org.scalatest.OutcomeOf$.OutcomeOf(OutcomeOf.scala:104)
在org.scalatest.Transformer.apply上(Transformer.scala:22)
在org.scalatest.Transformer.apply上(Transformer.scala:20)
在org.scalatest.flatspecike$$anon$1.apply(flatspecike.scala:1681)
位于org.scalatest.Suite$class.withFixture(Suite.scala:1031)
位于org.scalatest.FlatSpec.withFixture(FlatSpec.scala:1691)
在org.scalatest.flatspecilike$class.invokeWithFixture$1(flatspecilike.scala:1678)
在org.scalatest.flatspecike$$anonfun$runTest$1.apply上(flatspecike.scala:1690)
在org.scalatest.flatspecike$$anonfun$runTest$1.apply上(flatspecike.scala:1690)
位于org.scalatest.SuperEngine.runTestImpl(Engine.scala:287)
在org.scalatest.FlatSpecLike$class.runTest(FlatSpecLike.scala:1690)
位于org.scalatest.FlatSpec.runTest(FlatSpec.scala:1691)
在org.scalatest.flatspecike$$anonfun$runTests$1.apply上(flatspecike.scala:1748)
在org.scalatest.flatspecike$$anonfun$runTests$1.apply上(flatspecike.scala:1748)
位于org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:394)
位于org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:382)
位于scala.collection.immutable.List.foreach(List.scala:318)
位于org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:382)
位于org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:371)
位于org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:408)
位于org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:382)
位于scala.collection.immutable.List.foreach(List.scala:318)
位于org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:382)
位于org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:377)
位于org.scalatest.SuperEngine.runtestsiml(Engine.scala:459)
在org.scalatest.FlatSpecLike$class.runTests(FlatSpecLike.scala:1748)
位于org.scalatest.FlatSpec.runTests(FlatSpec.scala:1691)
位于org.scalatest.Suite$class.run(Suite.scala:1320)
在org.scalatest.FlatSpec.org$scalatest$flatspecilike$$super$run(FlatSpec.scala:1691)
在org.scalatest.flatspecike$$anonfun$run$1.apply上(flatspecike.scala:1794)
在org.scalatest.flatspecike$$anonfun$run$1.apply上(flatspecike.scala:1794)
位于org.scalatest.SuperEngine.runImpl(Engine.scala:519)
在org.scalatest.FlatSpecLike$class.run(FlatSpecLike.scala:1794)
在org.scalatest.FlatSpec.run(FlatSpec.scala:1691)
在org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:46)上
位于org.scalatest.tools.Runner$$anonfun$dorunrun$1.apply(Runner.scala:1340)
在org.scalatest.tools.Runner$$anonfun$dorunrun$1.apply上(Runner.scala:1334)
位于scala.collection.immutable.List.foreach(List.scala:318)
位于org.scalatest.tools.Runner$.dorunrunrun(Runner.scala:1334)
在org.scalatest.tools.Runner$$anonfun$runoptionalywithpassfailreporter$2.apply(Runner.scala:1011)
在org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply上(Runner.scala:1010)
在org.scalatest.tools.Runner$.withClassLoaderAddispatchReporter(Runner.scala:1500)上
在org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010)上
位于org.scalatest.tools.Runner$.run(Runner.scala:850)
位于org.scalatest.tools.Runner.run(Runner.scala)
位于org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:138)
位于org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28)
原因:java.io.NotSerializableException:org.scalatest.Assertions$AssertionHelper
序列化堆栈:
-对象不可序列化(类:org.scalatest.Assertions$assertionHelper,值:org.scalatest.Assertions$AssertionsHelper@45e639ee)
-字段(类:org.scalatest.FlatSpec,名称:assertionHelper,类型:class org.scalatest.Assertions$assertionHelper)
-对象(类com.myproject.repository.MyRepositorySpec,MyRepositorySpec)
-字段(类:com.myproject.repository.MyRepositorySpec$$anonfun$getDummyData$1,名称:$outer,类型:cl