Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/cassandra/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
RDD不可序列化Cassandra/Spark连接器java API_Java_Cassandra_Apache Spark_Connector_Datastax - Fatal编程技术网

RDD不可序列化Cassandra/Spark连接器java API

RDD不可序列化Cassandra/Spark连接器java API,java,cassandra,apache-spark,connector,datastax,Java,Cassandra,Apache Spark,Connector,Datastax,所以我之前在java maven项目中有一些关于如何使用spark查询cassandra的问题: 我的问题得到了回答,它起了作用,但是我遇到了一个问题(可能是一个问题)。我现在尝试使用datastax java API。这是我的密码: package com.angel.testspark.test2; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apach

所以我之前在java maven项目中有一些关于如何使用spark查询cassandra的问题:

我的问题得到了回答,它起了作用,但是我遇到了一个问题(可能是一个问题)。我现在尝试使用datastax java API。这是我的密码:

package com.angel.testspark.test2;

import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;

import java.io.Serializable;

import static com.datastax.spark.connector.CassandraJavaUtil.*;


public class App 
{

    // firstly, we define a bean class
    public static class Person implements Serializable {
        private Integer id;
        private String fname;
        private String lname;
        private String role;

        // Remember to declare no-args constructor
        public Person() { }

        public Integer getId() { return id; }
        public void setId(Integer id) { this.id = id; }

        public String getfname() { return fname; }
        public void setfname(String fname) { this.fname = fname; }

        public String getlname() { return lname; }
        public void setlname(String lname) { this.lname = lname; }

        public String getrole() { return role; }
        public void setrole(String role) { this.role = role; }

        // other methods, constructors, etc.
    }

    private transient SparkConf conf;
    private App(SparkConf conf) {
        this.conf = conf;
    }


    private void run() {
        JavaSparkContext sc = new JavaSparkContext(conf);
        createSchema(sc);


        sc.stop();
    }

    private void createSchema(JavaSparkContext sc) {

        JavaRDD<String> rdd = javaFunctions(sc).cassandraTable("tester", "empbyrole", Person.class)
                .where("role=?", "IT Engineer").map(new Function<Person, String>() {
                    @Override
                    public String call(Person person) throws Exception {
                        return person.toString();
                    }
                });
        System.out.println("Data as Person beans: \n" + StringUtils.join("\n", rdd.toArray()));
               }



    public static void main( String[] args )
    {
        if (args.length != 2) {
            System.err.println("Syntax: com.datastax.spark.demo.JavaDemo <Spark Master URL> <Cassandra contact point>");
            System.exit(1);
        }

        SparkConf conf = new SparkConf();
        conf.setAppName("Java API demo");
        conf.setMaster(args[0]);
        conf.set("spark.cassandra.connection.host", args[1]);

        App app = new App(conf);
        app.run();
    }
}
现在我知道我的错误在哪里了。它是
System.out.println(“作为Person bean的数据:\n”+StringUtils.join(“\n”,rdd.toArray()))因为我需要将rdd转换为数组。然而,API文档说我应该能够做到这一点。。。这是从文档中复制和粘贴的代码。为什么我不能将RDD序列化为数组

我已经使用上面链接中包含的帖子中的插入将虚拟数据插入到我的cassandra中

另外,我以前解决的一个错误是,我将所有getter和setter都改为小写。当我在它们中使用大写字母时,它产生了一个错误。为什么我不能在我的接球手和二传手中使用大写字母

谢谢,
Angel

公共类应用程序
更改为
公共类应用程序实现可序列化
应该可以修复错误。由于java内部类将保留对外部类的引用,因此
函数
对象将具有对
App
的引用。由于Spark需要序列化您的
函数
对象,它要求
应用程序
也可以序列化。

谢谢!成功了。你有什么见解,为什么这句话不起作用呢
JavaRDD rdd=javaFunctions(sc).cassandraTable(“tester”,“empbyrole”,“Person.class”).where(“role=?”,“IT Engineer”).map(new Function()
如果我离开.where(),但如果我删除它并保留.map,整个代码都会工作。有文档记录,.where-should-work我在上面发布了另一个问题,但它已经得到了回答。谢谢!
Exception in thread "main" org.apache.spark.SparkException: Job aborted: Task not serializable: java.io.NotSerializableException: com.angel.testspark.test2.App
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1020)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1018)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$abortStage(DAGScheduler.scala:1018)
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:781)
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:724)
    at org.apache.spark.scheduler.DAGScheduler.processEvent(DAGScheduler.scala:554)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$start$1$$anon$2$$anonfun$receive$1.applyOrElse(DAGScheduler.scala:190)
    at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
    at akka.actor.ActorCell.invoke(ActorCell.scala:456)
    at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
    at akka.dispatch.Mailbox.run(Mailbox.scala:219)
    at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
    at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
    at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
    at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
    at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)