Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Apache spark 从kafka Spark流读取数据时获取空集_Apache Spark_Apache Kafka_Spark Streaming_Spark Dataframe - Fatal编程技术网

Apache spark 从kafka Spark流读取数据时获取空集

Apache spark 从kafka Spark流读取数据时获取空集,apache-spark,apache-kafka,spark-streaming,spark-dataframe,Apache Spark,Apache Kafka,Spark Streaming,Spark Dataframe,嗨,我是新的火花流。我正在尝试读取xml文件并将其发送到kafka topic。这是我的卡夫卡代码,它将数据发送到卡夫卡控制台消费者 代码: package org.apache.kafka.Kafka_Producer; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.u

嗨,我是新的火花流。我正在尝试读取xml文件并将其发送到kafka topic。这是我的卡夫卡代码,它将数据发送到卡夫卡控制台消费者

代码:

package org.apache.kafka.Kafka_Producer;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Properties;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutionException;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;

@SuppressWarnings("unused")
public class KafkaProducer { 
   private static String sCurrentLine;
   public static void main(String args[]) throws InterruptedException, ExecutionException{ 
       try (BufferedReader br = new BufferedReader(new FileReader("/Users/sreeharsha/Downloads/123.txt")))
       {
           while ((sCurrentLine = br.readLine()) != null) {
               System.out.println(sCurrentLine);
               kafka(sCurrentLine);
           }
       } catch (FileNotFoundException e) {
           // TODO Auto-generated catch block
           e.printStackTrace();
       } catch (IOException e) {
           // TODO Auto-generated catch block
           e.printStackTrace();}
   }
   public static void kafka(String sCurrentLine)  {
       Properties props = new Properties();
       props.put("metadata.broker.list", "localhost:9092");
       props.put("serializer.class", "kafka.serializer.StringEncoder");
       props.put("partitioner.class","kafka.producer.DefaultPartitioner");
       props.put("request.required.acks", "1");
       ProducerConfig config = new ProducerConfig(props);
       Producer<String, String> producer = new Producer<String, String>(config);
       producer.send(new KeyedMessage<String, String>("sample",sCurrentLine));
       producer.close();
   }
}
下面您可以看到数据接收方式的屏幕截图:

使用以下版本:

Spark-2.0.0

动物园管理员-3.4.6

卡夫卡-0.8.2.1


请给我任何建议,

在网上冲浪之后,我终于找到了这些解决方案

不要同时使用“Spark Submit”和“SetMaster”

  • 如果从IDE运行代码,请在代码中使用SetMaster
  • 如果您通过“Spark Submit”运行jar,请不要将setMaster放在代码中
还有一件事,首先运行/提交spark jar,然后将数据发送到Kafka控制台


工作正常。

SparkReceiver类的代码在哪里?您已经发布了SparkStringConsumer类,在该类中,您将主题用作“mytopic”,在KafkaProducer类中,您将发送关于主题“sample”的消息。你能检查一下吗?现在更新了吗?你能再检查一次吗?试着在卡夫卡页面中生成新的MSG给卡夫卡发送消息在sparkTry中读取getting empty set时这不是问题,在你的producer类中使用它而不是从文件中读取。这是测试用的。随机=新随机();虽然(true){kafka(“Test-”+random.nextInt(100));Thread.sleep(500);}}请同时检查它在SparkStringConsumer类中解析StringDecoder.class的位置。应该是import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaStreamingContext;

public class SparkStringConsumer {

   public static void main(String[] args) {

       SparkConf conf = new SparkConf()
               .setAppName("kafka-sandbox")
               .setMaster("local[*]");
       JavaSparkContext sc = new JavaSparkContext(conf);
       JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

       Map<String, String> kafkaParams = new HashMap<>();
       kafkaParams.put("metadata.broker.list", "localhost:9092");
       Set<String> topics = Collections.singleton("sample");

       JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,
       String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);
       directKafkaStream.foreachRDD(rdd -> {
       System.out.println("--- New RDD with " + rdd.partitions().size()
           + " partitions and " + rdd.count() + " records");
       rdd.foreach(record -> System.out.println(record._2));
       });
       ssc.start();
       ssc.awaitTermination();
   }
}
./spark-submit --class org.apache.spark_streaming.Spark_Kafka_Streaming.SparkStringConsumer --master local[4] Spark_Kafka_Streaming-0.0.1-SNAPSHOT.jar