Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/cassandra/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
在java中使用Apache Spark连接器从Cassandra检索数据时出错_Java_Cassandra_Apache Spark - Fatal编程技术网

在java中使用Apache Spark连接器从Cassandra检索数据时出错

在java中使用Apache Spark连接器从Cassandra检索数据时出错,java,cassandra,apache-spark,Java,Cassandra,Apache Spark,使用sparkCassandraConnector从Cassandra表检索数据时遇到问题。我在Cassandra中创建了一个名为“ks”的名称空间和表“student”。该表如下所示: id |名称 ----+----------- 10 |凯瑟琳 我通过运行start-all.sh在本地启动Spark 然后我创建了这个类“SparkCassandraConnector”,它有一个连接spark和Cassandra的命令。我试图做的是从student表中检索数据并在屏幕上打印 我得到的错误是“

使用sparkCassandraConnector从Cassandra表检索数据时遇到问题。我在Cassandra中创建了一个名为“ks”的名称空间和表“student”。该表如下所示:

id |名称

----+-----------

10 |凯瑟琳

我通过运行start-all.sh在本地启动Spark

然后我创建了这个类“SparkCassandraConnector”,它有一个连接spark和Cassandra的命令。我试图做的是从student表中检索数据并在屏幕上打印

我得到的错误是“java.lang.ClassNotFoundException:SparkCassandranConnector$Student” java.net.URLClassLoader$1.run(URLClassLoader.java:372) java.net.URLClassLoader$1.run(URLClassLoader.java:361) java.security.AccessController.doPrivileged(本机方法) java.net.URLClassLoader.findClass(URLClassLoader.java:360) loadClass(ClassLoader.java:424) loadClass(ClassLoader.java:357) java.lang.Class.forName0(本机方法) java.lang.Class.forName(Class.java:340)

这是我的节目:

import org.apache.commons.lang.StringUtils;

import org.apache.spark.SparkConf;

import org.apache.spark.api.java.JavaRDD;

import org.apache.spark.api.java.JavaSparkContext;

import java.io.Serializable;

import static com.datastax.spark.connector.CassandraJavaUtil.javaFunctions;

public class SparkCassandraConnector  implements Serializable {
public static void main(String[] args) {

    SparkConf conf = new SparkConf().setAppName("Simple Application");

    conf.setMaster("spark://127.0.0.1:7077");
    conf.set("spark.cassandra.connection.host", "127.0.0.1");
    String[] jars = new String[10];
    jars[0] = "~/.m2/repository/com/datastax/spark/spark-cassandra-connector-java_2.10/1.1.0-alpha4/spark-cassandra-connector-java_2.10-1.1.0-alpha4.jar";
    jars[1] = "~/.m2/repository/com/datastax/cassandra/cassandra-driver-core/2.1.0/cassandra-driver-core-2.1.0.jar";
    jars[3] = "~/.m2/repository/com/datastax/spark/spark-cassandra-connector_2.10/1.1.0-alpha4/spark-cassandra-connector_2.10-1.1.0-alpha4.jar";
    jars[4] = "~/.m2/repository/com/datastax/cassandra/cassandra-driver-core/2.1.0/cassandra-driver-core-2.1.0.jar";
    jars[5] = "~/.m2/repository/org/apache/cassandra/cassandra-thrift/2.1.0/cassandra-thrift-2.1.0.jar";
    jars[6] = "~/.m2/repository/org/apache/cassandra/cassandra-clientutil/2.1.0/cassandra-clientutil-2.1.0.jar";
    conf = conf.setJars(jars);
    JavaSparkContext sc = new JavaSparkContext(conf);

    JavaRDD<String> rdd = javaFunctions(sc).cassandraTable("ks", "student", Student.class)
            .map(new org.apache.spark.api.java.function.Function<Student, String>() {
                @Override
                public String call(Student person) throws Exception {
                    return person.toString();
                }
            });
    System.out.println("Data as Person beans: \n" + StringUtils.join(rdd.collect(), "\n"));
}
public static class Student implements  Serializable{

    private Integer id;
    private String name;

    public Student(){

    }
    public Student(Integer id, String name) {
        this.id = id;
        this.name = name;
    }

    public Integer getId() {
        return id;
    }

    public void setId(Integer id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }
}
import org.apache.commons.lang.StringUtils;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaRDD;
导入org.apache.spark.api.java.JavaSparkContext;
导入java.io.Serializable;
导入静态com.datastax.spark.connector.CassandraJavaUtil.javaFunctions;
公共类SparkCassandraConnector实现可序列化{
公共静态void main(字符串[]args){
SparkConf conf=new SparkConf().setAppName(“简单应用程序”);
conf.setMaster(“spark://127.0.0.1:7077");
conf.set(“spark.cassandra.connection.host”,“127.0.0.1”);
String[]jars=新字符串[10];
jars[0]=“~/.m2/repository/com/datastax/spark/spark-cassandra-connector-java_2.10/1.1.0-alpha4/spark-cassandra-connector-java_2.10-1.1.0-alpha4.jar”;
jars[1]=“~/.m2/repository/com/datasax/cassandra/cassandra-driver-core/2.1.0/cassandra-driver-core-2.1.0.jar”;
jars[3]=“~/.m2/repository/com/datasax/spark/spark-cassandra-connector_2.10/1.1.0-alpha4/spark-cassandra-connector_2.10-1.1.0-alpha4.jar”;
jars[4]=“~/.m2/repository/com/datasax/cassandra/cassandra-driver-core/2.1.0/cassandra-driver-core-2.1.0.jar”;
jars[5]=“~/.m2/repository/org/apache/cassandra/cassandra-thrift/2.1.0/cassandra-thrift-2.1.0.jar”;
jars[6]=“~/.m2/repository/org/apache/cassandra/cassandra-clientutil/2.1.0/cassandra-clientutil-2.1.0.jar”;
conf=conf.setJars(jars);
JavaSparkContext sc=新的JavaSparkContext(conf);
JavaRDD rdd=javaFunctions(sc.cassandraTable(“ks”,“student”,“student.class”)
.map(新org.apache.spark.api.java.function.function(){
@凌驾
公共字符串调用(学生个人)引发异常{
return person.toString();
}
});
System.out.println(“作为Person bean的数据:\n”+StringUtils.join(rdd.collect(),“\n”);
}
公共静态类学生实现可序列化{
私有整数id;
私有字符串名称;
公立学生(){
}
公立学生(整数id、字符串名称){
this.id=id;
this.name=名称;
}
公共整数getId(){
返回id;
}
公共无效集合id(整数id){
this.id=id;
}
公共字符串getName(){
返回名称;
}
公共void集合名(字符串名){
this.name=名称;
}
}
}

这是我的POM文件:

<dependencies>


    <!--Spark-->

    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.10</artifactId>
        <version>1.1.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming_2.10</artifactId>
        <version>1.1.0</version>
    </dependency>
    <dependency>
        <groupId>com.datastax.cassandra</groupId>
        <artifactId>cassandra-driver-core</artifactId>
        <version>2.1.0</version>
    </dependency>
    <dependency>
        <groupId>com.datastax.spark</groupId>
        <artifactId>spark-cassandra-connector_2.10</artifactId>
        <version>1.1.0-alpha4</version>
    </dependency>

    <dependency>
        <groupId>com.datastax.spark</groupId>
        <artifactId>spark-cassandra-connector-java_2.10</artifactId>
        <version>1.1.0-alpha4</version>
    </dependency>

    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-catalyst_2.10</artifactId>
        <version>1.0.0</version>
    </dependency>
</dependencies>

org.apache.spark
spark-core_2.10
1.1.0
org.apache.spark
spark-2.10
1.1.0
com.datasax.cassandra
卡桑德拉驱动核心
2.1.0
com.datasax.spark
spark-cassandra-connector_2.10
1.1.0-4
com.datasax.spark
spark-cassandra-connector-java_2.10
1.1.0-4
org.apache.spark
spark-catalyst_2.10
1.0.0

在提供的jar中,缺少包含作业的jar,因此缺少
Student.class
。请快速修复此问题,以添加项目的
/target
文件夹中的jar

另一种方法是将您的作业和所有依赖项打包到一个“uber jar”中,并将该uber jar用作唯一声明的jar


也可以使用
spark submit--Jars
选项从命令行提供jar。

我建议您继续使用汇编插件,您可以在这里阅读它的动机和sbt示例:谢谢您的链接。但是您是否同意我提供了必要的jar文件,并且不应该出现错误。我尝试添加了主插件的jar类(第一个建议的解决方案),但它不起作用。然后我尝试使用spark submit--jars,但我得到了无法识别的-jar选项的错误。我尝试创建了Uber jar,这个解决方案起作用了。多亏了@maasg