如何解决错误java.io.NotSerializableException处理数据帧?

如何解决错误java.io.NotSerializableException处理数据帧?,java,scala,apache-spark,Java,Scala,Apache Spark,日志如下所示: def URLEnc(input: String): String = { URLEncoder.encode(input, "UTF-8") } val URLEncUDF: UserDefinedFunction = udf(URLEnc(_: String)) val file = spark.read.format("xml") .option("rootTag", "chan

日志如下所示:

def URLEnc(input: String): String = {
    URLEncoder.encode(input, "UTF-8")
  }

  val URLEncUDF: UserDefinedFunction = udf(URLEnc(_: String))
val file = spark.read.format("xml")
    .option("rootTag", "channel").option("rowTag", "item")
    .load("path")
 where file is of xml format 

val file1 = file.withColumn("description", URLEncUDF(col("g:description")))
线程“main”org.apache.spark.SparkException中的异常:任务不可序列化 位于org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:416) 位于org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:406) 位于org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:162) 位于org.apache.spark.SparkContext.clean(SparkContext.scala:2362) 在org.apache.spark.rdd.rdd.$anonfun$mapPartitionsWithIndex$1(rdd.scala:886) 位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) 位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) 位于org.apache.spark.rdd.rdd.withScope(rdd.scala:388) 位于org.apache.spark.rdd.rdd.mapPartitionsWithIndex(rdd.scala:885) 位于org.apache.spark.sql.execution.whisttagecodegenexec.doExecute(whisttagecodegenexec.scala:723) 位于org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) 位于org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) 位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) 位于org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) 位于org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) 位于org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:316) 位于org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:434) 位于org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:420) 位于org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47) 位于org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3627) 位于org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2697) 在org.apache.spark.sql.Dataset.$anonfun$上,操作$1(Dataset.scala:3618) 位于org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) 在org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) 位于org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) 位于org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764) 位于org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) 位于org.apache.spark.sql.Dataset.withAction(Dataset.scala:3616) 位于org.apache.spark.sql.Dataset.head(Dataset.scala:2697) 位于org.apache.spark.sql.Dataset.take(Dataset.scala:2904) 位于org.apache.spark.sql.Dataset.getRows(Dataset.scala:300) 位于org.apache.spark.sql.Dataset.showString(Dataset.scala:337) 在org.apache.spark.sql.Dataset.show(Dataset.scala:826)上 在org.apache.spark.sql.Dataset.show(Dataset.scala:803)上 在AIFeed。(AIFeed.scala:16) 在AIFeed$.delayedEndpoint$AIFeed$1(AIFeed.scala:113) 在AIFeed$delayedInit$body.apply(AIFeed.scala:112) 在scala.Function0.apply$mcV$sp处(Function0.scala:39) 在scala.Function0.apply$mcV$sp$(Function0.scala:39) 在scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:17) 在scala.App.$anonfun$main$1$adapted(App.scala:80) 位于scala.collection.immutable.List.foreach(List.scala:392) 位于scala.App.main(App.scala:80) 位于scala.App.main$(App.scala:78) 在AIFeed$.main(AIFeed.scala:112) 位于AIFeed.main(AIFeed.scala) 原因:java.io.NotSerializableException:AIFeed 序列化堆栈: -对象不可序列化(类:AIFeed,值:AIFeed@5bccef9f) -数组的元素(索引:0) -数组(类[Ljava.lang.Object;,大小1) -字段(类:java.lang.invoke.SerializedLambda,名称:capturedArgs,类型:class[Ljava.lang.Object;) -对象(类java.lang.invoke.SerializedLambda,SerializedLambda[capturingClass=class FeedFunction,FunctionInterfaceMethod=scala/Function1.apply:(Ljava/lang/object;)Ljava/lang/object;,实现=invokeStatic FeedFunction.$anonfun$URLEncUDF$1:(LFeedFunction;Ljava/lang/String;)Ljava/lang/String;,实例化方法类型=(Ljava/lang/String;)Ljava/lang/String;,numCaptured=1]) -writeReplace数据(类:java.lang.invoke.SerializedLambda) -对象(类FeedFunction$$Lambda$275/1443173326,FeedFunction$$Lambda$275/1443173326@51e94b7d) -数组元素(索引:5) -数组(类[Ljava.lang.Object;,大小6) -数组的元素(索引:1) -数组(类[Ljava.lang.Object;,大小3) -字段(类:java.lang.invoke.SerializedLambda,名称:capturedArgs,类型:class[Ljava.lang.Object;) -对象(类java.lang.invoke.SerializedLambda,SerializedLambda[capturingClass=class org.apache.spark.sql.execution.WhistAgeCodeGenexec,FunctionInterfaceMethod=scala/Function2.apply:(Ljava/lang/object;Ljava/lang/object;)Ljava/lang/Object;,implementation=invokeStatic org/apache/spark/sql/execution/whisttagecodegenexec.$anonfun$doExecute$4$adapted:(Lorg/apache/spark/sql/catalyst/expressions/codegen/codeandcoment;[Ljava/lang/Object;Lorg/apache/spark/sql/sql/execution/metric/SQLMetric;Ljava/lang/Object/Object;Lscala/collection/Lscala/collection/Iterator;,实例化方法类型=(Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;,numcapture=3]) -writeReplace数据(类:java.lang.invoke.SerializedLambda) -对象(类org.apache.spark.sql.execution.whisttagecodegenexec$$Lambda$2116/996471089,org.apache.spark.sql.execution.whisttagecodegenexec$$Lambda$2116/996471089@565a6af) 位于org.apache.spark.serializer.SerializationDebugger$.ImproveeException(SerializationDebugger.scala:41) 位于org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47) 位于org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:101) 位于org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:413) …还有45个 20/12/1
Exception in thread "main" org.apache.spark.SparkException: Task not serializable
    at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:416)
    at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:406)
    at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:162)
    at org.apache.spark.SparkContext.clean(SparkContext.scala:2362)
    at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$1(RDD.scala:886)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:388)
    at org.apache.spark.rdd.RDD.mapPartitionsWithIndex(RDD.scala:885)
    at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:723)
    at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
    at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
    at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
    at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:316)
    at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:434)
    at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:420)
    at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47)
    at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3627)
    at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2697)
    at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3618)
    at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
    at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
    at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
    at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
    at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
    at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3616)
    at org.apache.spark.sql.Dataset.head(Dataset.scala:2697)
    at org.apache.spark.sql.Dataset.take(Dataset.scala:2904)
    at org.apache.spark.sql.Dataset.getRows(Dataset.scala:300)
    at org.apache.spark.sql.Dataset.showString(Dataset.scala:337)
    at org.apache.spark.sql.Dataset.show(Dataset.scala:826)
    at org.apache.spark.sql.Dataset.show(Dataset.scala:803)
    at AIFeed.<init>(AIFeed.scala:16)
    at AIFeed$.delayedEndpoint$AIFeed$1(AIFeed.scala:113)
    at AIFeed$delayedInit$body.apply(AIFeed.scala:112)
    at scala.Function0.apply$mcV$sp(Function0.scala:39)
    at scala.Function0.apply$mcV$sp$(Function0.scala:39)
    at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:17)
    at scala.App.$anonfun$main$1$adapted(App.scala:80)
    at scala.collection.immutable.List.foreach(List.scala:392)
    at scala.App.main(App.scala:80)
    at scala.App.main$(App.scala:78)
    at AIFeed$.main(AIFeed.scala:112)
    at AIFeed.main(AIFeed.scala)
Caused by: java.io.NotSerializableException: AIFeed
Serialization stack:
    - object not serializable (class: AIFeed, value: AIFeed@5bccef9f)
    - element of array (index: 0)
    - array (class [Ljava.lang.Object;, size 1)
    - field (class: java.lang.invoke.SerializedLambda, name: capturedArgs, type: class [Ljava.lang.Object;)
    - object (class java.lang.invoke.SerializedLambda, SerializedLambda[capturingClass=class FeedFunction, functionalInterfaceMethod=scala/Function1.apply:(Ljava/lang/Object;)Ljava/lang/Object;, implementation=invokeStatic FeedFunction.$anonfun$URLEncUDF$1:(LFeedFunction;Ljava/lang/String;)Ljava/lang/String;, instantiatedMethodType=(Ljava/lang/String;)Ljava/lang/String;, numCaptured=1])
    - writeReplace data (class: java.lang.invoke.SerializedLambda)
    - object (class FeedFunction$$Lambda$275/1443173326, FeedFunction$$Lambda$275/1443173326@51e94b7d)
    - element of array (index: 5)
    - array (class [Ljava.lang.Object;, size 6)
    - element of array (index: 1)
    - array (class [Ljava.lang.Object;, size 3)
    - field (class: java.lang.invoke.SerializedLambda, name: capturedArgs, type: class [Ljava.lang.Object;)
    - object (class java.lang.invoke.SerializedLambda, SerializedLambda[capturingClass=class org.apache.spark.sql.execution.WholeStageCodegenExec, functionalInterfaceMethod=scala/Function2.apply:(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;, implementation=invokeStatic org/apache/spark/sql/execution/WholeStageCodegenExec.$anonfun$doExecute$4$adapted:(Lorg/apache/spark/sql/catalyst/expressions/codegen/CodeAndComment;[Ljava/lang/Object;Lorg/apache/spark/sql/execution/metric/SQLMetric;Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;, instantiatedMethodType=(Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;, numCaptured=3])
    - writeReplace data (class: java.lang.invoke.SerializedLambda)
    - object (class org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$2116/996471089, org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$2116/996471089@565a6af)
    at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:41)
    at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47)
    at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:101)
    at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:413)
    ... 45 more
20/12/16 17:55:15 INFO BlockManagerInfo: Removed broadcast_1_piece0 on 192.168.1.4:34511 in memory (size: 2.9 KiB, free: 1407.3 MiB)
20/12/16 17:55:15 INFO SparkContext: Invoking stop() from shutdown hook
20/12/16 17:55:15 INFO BlockManagerInfo: Removed broadcast_0_piece0 on 192.168.1.4:34511 in memory (size: 23.7 KiB, free: 1407.3 MiB)
20/12/16 17:55:15 INFO SparkUI: Stopped Spark web UI at http://192.168.1.4:4040
20/12/16 17:55:15 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
20/12/16 17:55:15 INFO MemoryStore: MemoryStore cleared
20/12/16 17:55:15 INFO BlockManager: BlockManager stopped
20/12/16 17:55:15 INFO BlockManagerMaster: BlockManagerMaster stopped
20/12/16 17:55:15 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
20/12/16 17:55:15 INFO SparkContext: Successfully stopped SparkContext
20/12/16 17:55:15 INFO ShutdownHookManager: Shutdown hook called

def someMeth(a:Int): Int = a + 1
val someFunc = (a: Int) => a + 1