Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java Spark DirectStream问题_Java_Apache Spark_Apache Kafka_Spark Streaming - Fatal编程技术网

Java Spark DirectStream问题

Java Spark DirectStream问题,java,apache-spark,apache-kafka,spark-streaming,Java,Apache Spark,Apache Kafka,Spark Streaming,我正试图从Kafka创建Spark Direct Stream,但在创建directStream对象时,我得到的错误如下: kafkaUtils类型中的createDirectStream方法不适用于(我正在传递的HashMap参数之一) 在这一行: JavaPairInputStream DirectKafkatStream=KafkaUtils.创建DirectStream(ssc,String.class, String.class、StringDecoder.class、StringDe

我正试图从Kafka创建Spark Direct Stream,但在创建directStream对象时,我得到的错误如下:

kafkaUtils类型中的createDirectStream方法不适用于(我正在传递的HashMap参数之一)

在这一行: JavaPairInputStream DirectKafkatStream=KafkaUtils.创建DirectStream(ssc,String.class, String.class、StringDecoder.class、StringDecoder.class、kafkaParams、topics)

完整代码:

package kafkatest2;



import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import org.apache.commons.codec.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.apache.spark.streaming.Duration; 
import org.apache.spark.streaming.kafka010.*;
public class SparkStream {

    public static void main(String[] args) {

        SparkConf conf = new SparkConf()
                .setAppName("kafka-sandbox")
                .setMaster("local[*]");
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

        // TODO: processing pipeline
       Map<String,String> kafkaParams = new HashMap<String,String>();
       kafkaParams.put("metadata.broker.list", "localhost:9092");

        Set<String> topics = Collections.singleton("topic5");

JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,String.class,
        String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);


        directKafkaStream.foreachRDD(rdd -> {
            System.out.println("--- New RDD with " + rdd.partitions().size()
                    + " partitions and " + rdd.count() + " records");
            rdd.foreach(record -> System.out.println(record._2));
        });
        ssc.start();
        ssc.awaitTermination();
    }
}
kafkatest2包;
导入java.util.Collections;
导入java.util.HashMap;
导入java.util.Map;
导入java.util.Set;
导入org.apache.commons.codec.StringDecoder;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.api.java.JavaPairInputStream;
导入org.apache.spark.streaming.api.java.JavaStreamingContext;
导入org.apache.spark.streaming.kafka.KafkaUtils;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.kafka010.*;
公共级SparkStream{
公共静态void main(字符串[]args){
SparkConf conf=新的SparkConf()
.setAppName(“卡夫卡沙盒”)
.setMaster(“本地[*]”);
JavaSparkContext sc=新的JavaSparkContext(conf);
JavaStreamingContext ssc=新的JavaStreamingContext(sc,新的持续时间(2000));
//TODO:处理管道
Map kafkaParams=新HashMap();
kafkaParams.put(“metadata.broker.list”,“localhost:9092”);
设置主题=Collections.singleton(“topic5”);
JavaPairInputStream directKafkaStream=KafkaUtils.createDirectStream(ssc,String.class,
String.class、StringDecoder.class、StringDecoder.class、kafkaParams、topics);
directKafkaStream.foreachRDD(rdd->{
System.out.println(“--New RDD with”+RDD.partitions().size()
+“分区和”+rdd.count()+“记录”);
rdd.foreach(记录->系统输出.println(记录._2));
});
ssc.start();
ssc.终止();
}
}

在您的代码中,使用了错误的
StringDecoder
。它应该是
kafka.serializer.StringDecoder
,而不是
org.apache.commons.codec.StringDecoder

正确的代码如下:

package kafkatest2;



import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.apache.spark.streaming.Duration; 
import org.apache.spark.streaming.kafka010.*;
public class SparkStream {

    public static void main(String[] args) {

        SparkConf conf = new SparkConf()
                .setAppName("kafka-sandbox")
                .setMaster("local[*]");
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

        // TODO: processing pipeline
       Map<String,String> kafkaParams = new HashMap<String,String>();
       kafkaParams.put("metadata.broker.list", "localhost:9092");

        Set<String> topics = Collections.singleton("topic5");

JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,String.class,
        String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);


        directKafkaStream.foreachRDD(rdd -> {
            System.out.println("--- New RDD with " + rdd.partitions().size()
                    + " partitions and " + rdd.count() + " records");
            rdd.foreach(record -> System.out.println(record._2));
        });
        ssc.start();
        ssc.awaitTermination();
    }
}
kafkatest2包;
导入java.util.Collections;
导入java.util.HashMap;
导入java.util.Map;
导入java.util.Set;
导入kafka.serializer.StringDecoder;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.api.java.JavaPairInputStream;
导入org.apache.spark.streaming.api.java.JavaStreamingContext;
导入org.apache.spark.streaming.kafka.KafkaUtils;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.kafka010.*;
公共级SparkStream{
公共静态void main(字符串[]args){
SparkConf conf=新的SparkConf()
.setAppName(“卡夫卡沙盒”)
.setMaster(“本地[*]”);
JavaSparkContext sc=新的JavaSparkContext(conf);
JavaStreamingContext ssc=新的JavaStreamingContext(sc,新的持续时间(2000));
//TODO:处理管道
Map kafkaParams=新HashMap();
kafkaParams.put(“metadata.broker.list”,“localhost:9092”);
设置主题=Collections.singleton(“topic5”);
JavaPairInputStream directKafkaStream=KafkaUtils.createDirectStream(ssc,String.class,
String.class、StringDecoder.class、StringDecoder.class、kafkaParams、topics);
directKafkaStream.foreachRDD(rdd->{
System.out.println(“--New RDD with”+RDD.partitions().size()
+“分区和”+rdd.count()+“记录”);
rdd.foreach(记录->系统输出.println(记录._2));
});
ssc.start();
ssc.终止();
}
}

我希望有帮助

在您的代码中,使用了错误的
StringDecoder
。它应该是
kafka.serializer.StringDecoder
,而不是
org.apache.commons.codec.StringDecoder

正确的代码如下:

package kafkatest2;



import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.apache.spark.streaming.Duration; 
import org.apache.spark.streaming.kafka010.*;
public class SparkStream {

    public static void main(String[] args) {

        SparkConf conf = new SparkConf()
                .setAppName("kafka-sandbox")
                .setMaster("local[*]");
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

        // TODO: processing pipeline
       Map<String,String> kafkaParams = new HashMap<String,String>();
       kafkaParams.put("metadata.broker.list", "localhost:9092");

        Set<String> topics = Collections.singleton("topic5");

JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,String.class,
        String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);


        directKafkaStream.foreachRDD(rdd -> {
            System.out.println("--- New RDD with " + rdd.partitions().size()
                    + " partitions and " + rdd.count() + " records");
            rdd.foreach(record -> System.out.println(record._2));
        });
        ssc.start();
        ssc.awaitTermination();
    }
}
kafkatest2包;
导入java.util.Collections;
导入java.util.HashMap;
导入java.util.Map;
导入java.util.Set;
导入kafka.serializer.StringDecoder;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.api.java.JavaPairInputStream;
导入org.apache.spark.streaming.api.java.JavaStreamingContext;
导入org.apache.spark.streaming.kafka.KafkaUtils;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.kafka010.*;
公共级SparkStream{
公共静态void main(字符串[]args){
SparkConf conf=新的SparkConf()
.setAppName(“卡夫卡沙盒”)
.setMaster(“本地[*]”);
JavaSparkContext sc=新的JavaSparkContext(conf);
JavaStreamingContext ssc=新的JavaStreamingContext(sc,新的持续时间(2000));
//TODO:处理管道
Map kafkaParams=新HashMap();
kafkaParams.put(“metadata.broker.list”,“localhost:9092”);
设置主题=Collections.singleton(“topic5”);
JavaPairInputStream directKafkaStream=KafkaUtils.createDirectStream(ssc,String.class,
String.class、StringDecoder.class、StringDecoder.class、kafkaParams、topics);
directKafkaStream.foreachRDD(rdd->{
System.out.println(“--New RDD with”+RDD.partitions().size()
+“分区和”+rdd.count()+“记录”);
rdd.foreach(记录->系统输出.println(记录._2));
});
ssc.start();
ssc.终止();
}
}

我希望有帮助

你使用的是什么版本的Spark和Kafka?Spark版本是2.2.0和K