Java Spark DirectStream问题
我正试图从Kafka创建Spark Direct Stream,但在创建directStream对象时,我得到的错误如下: kafkaUtils类型中的createDirectStream方法不适用于(我正在传递的HashMap参数之一) 在这一行: JavaPairInputStream DirectKafkatStream=KafkaUtils.创建DirectStream(ssc,String.class, String.class、StringDecoder.class、StringDecoder.class、kafkaParams、topics) 完整代码:Java Spark DirectStream问题,java,apache-spark,apache-kafka,spark-streaming,Java,Apache Spark,Apache Kafka,Spark Streaming,我正试图从Kafka创建Spark Direct Stream,但在创建directStream对象时,我得到的错误如下: kafkaUtils类型中的createDirectStream方法不适用于(我正在传递的HashMap参数之一) 在这一行: JavaPairInputStream DirectKafkatStream=KafkaUtils.创建DirectStream(ssc,String.class, String.class、StringDecoder.class、StringDe
package kafkatest2;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.commons.codec.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.kafka010.*;
public class SparkStream {
public static void main(String[] args) {
SparkConf conf = new SparkConf()
.setAppName("kafka-sandbox")
.setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));
// TODO: processing pipeline
Map<String,String> kafkaParams = new HashMap<String,String>();
kafkaParams.put("metadata.broker.list", "localhost:9092");
Set<String> topics = Collections.singleton("topic5");
JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,String.class,
String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);
directKafkaStream.foreachRDD(rdd -> {
System.out.println("--- New RDD with " + rdd.partitions().size()
+ " partitions and " + rdd.count() + " records");
rdd.foreach(record -> System.out.println(record._2));
});
ssc.start();
ssc.awaitTermination();
}
}
kafkatest2包;
导入java.util.Collections;
导入java.util.HashMap;
导入java.util.Map;
导入java.util.Set;
导入org.apache.commons.codec.StringDecoder;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.api.java.JavaPairInputStream;
导入org.apache.spark.streaming.api.java.JavaStreamingContext;
导入org.apache.spark.streaming.kafka.KafkaUtils;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.kafka010.*;
公共级SparkStream{
公共静态void main(字符串[]args){
SparkConf conf=新的SparkConf()
.setAppName(“卡夫卡沙盒”)
.setMaster(“本地[*]”);
JavaSparkContext sc=新的JavaSparkContext(conf);
JavaStreamingContext ssc=新的JavaStreamingContext(sc,新的持续时间(2000));
//TODO:处理管道
Map kafkaParams=新HashMap();
kafkaParams.put(“metadata.broker.list”,“localhost:9092”);
设置主题=Collections.singleton(“topic5”);
JavaPairInputStream directKafkaStream=KafkaUtils.createDirectStream(ssc,String.class,
String.class、StringDecoder.class、StringDecoder.class、kafkaParams、topics);
directKafkaStream.foreachRDD(rdd->{
System.out.println(“--New RDD with”+RDD.partitions().size()
+“分区和”+rdd.count()+“记录”);
rdd.foreach(记录->系统输出.println(记录._2));
});
ssc.start();
ssc.终止();
}
}
在您的代码中,使用了错误的StringDecoder
。它应该是kafka.serializer.StringDecoder
,而不是org.apache.commons.codec.StringDecoder
正确的代码如下:
package kafkatest2;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.kafka010.*;
public class SparkStream {
public static void main(String[] args) {
SparkConf conf = new SparkConf()
.setAppName("kafka-sandbox")
.setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));
// TODO: processing pipeline
Map<String,String> kafkaParams = new HashMap<String,String>();
kafkaParams.put("metadata.broker.list", "localhost:9092");
Set<String> topics = Collections.singleton("topic5");
JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,String.class,
String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);
directKafkaStream.foreachRDD(rdd -> {
System.out.println("--- New RDD with " + rdd.partitions().size()
+ " partitions and " + rdd.count() + " records");
rdd.foreach(record -> System.out.println(record._2));
});
ssc.start();
ssc.awaitTermination();
}
}
kafkatest2包;
导入java.util.Collections;
导入java.util.HashMap;
导入java.util.Map;
导入java.util.Set;
导入kafka.serializer.StringDecoder;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.api.java.JavaPairInputStream;
导入org.apache.spark.streaming.api.java.JavaStreamingContext;
导入org.apache.spark.streaming.kafka.KafkaUtils;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.kafka010.*;
公共级SparkStream{
公共静态void main(字符串[]args){
SparkConf conf=新的SparkConf()
.setAppName(“卡夫卡沙盒”)
.setMaster(“本地[*]”);
JavaSparkContext sc=新的JavaSparkContext(conf);
JavaStreamingContext ssc=新的JavaStreamingContext(sc,新的持续时间(2000));
//TODO:处理管道
Map kafkaParams=新HashMap();
kafkaParams.put(“metadata.broker.list”,“localhost:9092”);
设置主题=Collections.singleton(“topic5”);
JavaPairInputStream directKafkaStream=KafkaUtils.createDirectStream(ssc,String.class,
String.class、StringDecoder.class、StringDecoder.class、kafkaParams、topics);
directKafkaStream.foreachRDD(rdd->{
System.out.println(“--New RDD with”+RDD.partitions().size()
+“分区和”+rdd.count()+“记录”);
rdd.foreach(记录->系统输出.println(记录._2));
});
ssc.start();
ssc.终止();
}
}
我希望有帮助 在您的代码中,使用了错误的
StringDecoder
。它应该是kafka.serializer.StringDecoder
,而不是org.apache.commons.codec.StringDecoder
正确的代码如下:
package kafkatest2;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.kafka010.*;
public class SparkStream {
public static void main(String[] args) {
SparkConf conf = new SparkConf()
.setAppName("kafka-sandbox")
.setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));
// TODO: processing pipeline
Map<String,String> kafkaParams = new HashMap<String,String>();
kafkaParams.put("metadata.broker.list", "localhost:9092");
Set<String> topics = Collections.singleton("topic5");
JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,String.class,
String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);
directKafkaStream.foreachRDD(rdd -> {
System.out.println("--- New RDD with " + rdd.partitions().size()
+ " partitions and " + rdd.count() + " records");
rdd.foreach(record -> System.out.println(record._2));
});
ssc.start();
ssc.awaitTermination();
}
}
kafkatest2包;
导入java.util.Collections;
导入java.util.HashMap;
导入java.util.Map;
导入java.util.Set;
导入kafka.serializer.StringDecoder;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.api.java.JavaPairInputStream;
导入org.apache.spark.streaming.api.java.JavaStreamingContext;
导入org.apache.spark.streaming.kafka.KafkaUtils;
导入org.apache.spark.streaming.Duration;
导入org.apache.spark.streaming.kafka010.*;
公共级SparkStream{
公共静态void main(字符串[]args){
SparkConf conf=新的SparkConf()
.setAppName(“卡夫卡沙盒”)
.setMaster(“本地[*]”);
JavaSparkContext sc=新的JavaSparkContext(conf);
JavaStreamingContext ssc=新的JavaStreamingContext(sc,新的持续时间(2000));
//TODO:处理管道
Map kafkaParams=新HashMap();
kafkaParams.put(“metadata.broker.list”,“localhost:9092”);
设置主题=Collections.singleton(“topic5”);
JavaPairInputStream directKafkaStream=KafkaUtils.createDirectStream(ssc,String.class,
String.class、StringDecoder.class、StringDecoder.class、kafkaParams、topics);
directKafkaStream.foreachRDD(rdd->{
System.out.println(“--New RDD with”+RDD.partitions().size()
+“分区和”+rdd.count()+“记录”);
rdd.foreach(记录->系统输出.println(记录._2));
});
ssc.start();
ssc.终止();
}
}
我希望有帮助 你使用的是什么版本的Spark和Kafka?Spark版本是2.2.0和K