Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/396.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java &引用;“主要”;org.apache.spark.SparkException:任务不可序列化_Java_Mongodb_Apache Spark_Serialization - Fatal编程技术网

Java &引用;“主要”;org.apache.spark.SparkException:任务不可序列化

Java &引用;“主要”;org.apache.spark.SparkException:任务不可序列化,java,mongodb,apache-spark,serialization,Java,Mongodb,Apache Spark,Serialization,我正在尝试运行以下简单的Spark代码: package com.bdg.try.graph_api; import com.mongodb.spark.MongoSpark; import com.mongodb.spark.config.WriteConfig; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark

我正在尝试运行以下简单的Spark代码:

package com.bdg.try.graph_api;
import com.mongodb.spark.MongoSpark;
import com.mongodb.spark.config.WriteConfig;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.SparkSession;
import org.bson.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Map;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import static java.util.Arrays.asList;
@SpringBootApplication
public class SparkStreamingApplication {

private final Logger logger = LoggerFactory.getLogger(SparkStreamingApplication.class);

public static void main(String[] args) throws Exception {
    SpringApplication.run(SparkStreamingApplication.class, args);
    SparkStreamingApplication main = new SparkStreamingApplication();
    main.run();
}
private void run(){
SparkSession spark = SparkSession
        .builder()
        .master("spark://192.168.xx.xx:7077")
        .config("spark.mongodb.input.uri", "mongodb://192.168.xx.xx:27017/database.test_spark")
        .config("spark.database.output.uri", "mongodb://192.168.xx.xx:27017/database.test_spark")
        .config("spark.driver.allowMultipleContexts", "true")
        .appName("SparkTest")
        .getOrCreate();

JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());

// Create a custom WriteConfig
Map<String, String> writeOverrides = new HashMap<String, String>();
writeOverrides.put("collection", "spark");
writeOverrides.put("writeConcern.w", "majority");
WriteConfig writeConfig = WriteConfig.create(jsc).withOptions(writeOverrides);

// Create a RDD of 10 documents
JavaRDD<Document> sparkDocuments = jsc.parallelize(asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)).map
    (new Function<Integer, Document>() {
        public Document call(final Integer i) throws Exception {
            return Document.parse("{spark: " + i + "}");
        }
    });

System.out.println("collection : "+writeOverrides);
MongoSpark.save(sparkDocuments, writeConfig);
spark.stop();
}
}
[更新] 接下来,我添加
实现java.io.Serializable
,但上面有错误日志:

2017-07-07 09:27:19.238 ERROR 4369 --- [ffle-server-3-1] o.a.s.n.server.TransportRequestHandler   : Error while invoking RpcHandler#receive() on RPC id 7048300146537589013
java.lang.ClassNotFoundException: org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages$RetrieveSparkProps$
at java.net.URLClassLoader.findClass(URLClassLoader.java:381) ~[na:1.8.0_131]
at java.lang.ClassLoader.loadClass(ClassLoader.java:424) ~[na:1.8.0_131]
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335) ~[na:1.8.0_131]
at java.lang.ClassLoader.loadClass(ClassLoader.java:357) ~[na:1.8.0_131]
at java.lang.Class.forName0(Native Method) ~[na:1.8.0_131]
at java.lang.Class.forName(Class.java:348) ~[na:1.8.0_131]
at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67) ~[spark-core_2.11-2.1.0.jar:2.1.0]
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1826) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1713) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2000) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535) ~[na:1.8.0_131]
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2245) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:422) ~[na:1.8.0_131]
这里的“jsc”是我正在使用的JavaSparkContext对象。据我所知,JavaSparkContext不是一个可序列化的对象,人们不应该在发送给Spark workers的任何函数中使用它


现在,我无法理解的是,JavaSparkContext的实例是如何被发送给工作人员的?我应该在代码中更改什么以避免这种情况?

您需要收集
sparkDocuments
到驱动程序节点。由于您尚未收集,它们正在工作节点中执行。这里有一个类似的问题,问题是您定义的匿名类不可序列化。您需要将
sparkDocuments
收集到驱动程序节点。由于您尚未收集,它们在工作节点中执行。这里有一个类似的问题,问题是您定义的匿名类不可序列化。
2017-07-07 09:27:19.238 ERROR 4369 --- [ffle-server-3-1] o.a.s.n.server.TransportRequestHandler   : Error while invoking RpcHandler#receive() on RPC id 7048300146537589013
java.lang.ClassNotFoundException: org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages$RetrieveSparkProps$
at java.net.URLClassLoader.findClass(URLClassLoader.java:381) ~[na:1.8.0_131]
at java.lang.ClassLoader.loadClass(ClassLoader.java:424) ~[na:1.8.0_131]
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335) ~[na:1.8.0_131]
at java.lang.ClassLoader.loadClass(ClassLoader.java:357) ~[na:1.8.0_131]
at java.lang.Class.forName0(Native Method) ~[na:1.8.0_131]
at java.lang.Class.forName(Class.java:348) ~[na:1.8.0_131]
at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67) ~[spark-core_2.11-2.1.0.jar:2.1.0]
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1826) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1713) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2000) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535) ~[na:1.8.0_131]
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2245) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535) ~[na:1.8.0_131]
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:422) ~[na:1.8.0_131]