Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/scala/16.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Scala Spark流式HiveContext NullPointerException_Scala_Apache Spark_Apache Spark Sql_Spark Streaming_Cloudera Cdh - Fatal编程技术网

Scala Spark流式HiveContext NullPointerException

Scala Spark流式HiveContext NullPointerException,scala,apache-spark,apache-spark-sql,spark-streaming,cloudera-cdh,Scala,Apache Spark,Apache Spark Sql,Spark Streaming,Cloudera Cdh,我正在CDH5.8.3集群上使用Spark 1.6.0编写Spark流媒体应用程序。该应用程序非常简单:它从Kafka读取数据,对数据流/RDD进行一些转换,然后将它们输出到配置单元表。我还尝试使用sqlContext编写一些愚蠢的示例代码,但错误仍然存在 我的问题是我无法在数据流的foreachRDD语句中使用HiveContext 我的代码如下所示: val sc = new SparkContext() val sqlContext = new HiveContext(sc) val ss

我正在CDH5.8.3集群上使用Spark 1.6.0编写Spark流媒体应用程序。该应用程序非常简单:它从Kafka读取数据,对数据流/RDD进行一些转换,然后将它们输出到配置单元表。我还尝试使用sqlContext编写一些愚蠢的示例代码,但错误仍然存在

我的问题是我无法在数据流的foreachRDD语句中使用HiveContext

我的代码如下所示:

val sc = new SparkContext()
val sqlContext = new HiveContext(sc)
val ssc = new StreamingContext(sc, Minutes(sparkBatchInterval))
ssc.checkpoint(CHECKPOINT_DIR)
ssc.sparkContext.setLogLevel("WARN")
val kafkaParams = Map[String, String]("metadata.broker.list" -> brokersList, "auto.offset.reset" -> "smallest")
KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, Set(kafkaTopic))
val validatedAndPersisted = dstream.transform( rdd => {...}).persist(StorageLevel.MEMORY_AND_DISK_SER)
  val recordsToBeIngested = ...
  recordsToBeIngested.foreachRDD(rdd=> {
  rdd.persist(StorageLevel.MEMORY_AND_DISK)

  val ingestCount = rdd.count
  if(ingestCount>0) {
    sqlContext.tables("sc4").show() //here actually I shoud have a insertInto
  }
}
我得到的错误是:

Exception in thread "main" java.lang.NullPointerException
    at org.apache.spark.sql.hive.client.ClientWrapper.conf(ClientWrapper.scala:205)
    at org.apache.spark.sql.hive.HiveContext.hiveconf$lzycompute(HiveContext.scala:554)
    at org.apache.spark.sql.hive.HiveContext.hiveconf(HiveContext.scala:553)
    at org.apache.spark.sql.hive.HiveContext$$anonfun$configure$1.apply(HiveContext.scala:540)
    at org.apache.spark.sql.hive.HiveContext$$anonfun$configure$1.apply(HiveContext.scala:539)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
    at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
    at scala.collection.immutable.List.foreach(List.scala:318)
    at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
    at scala.collection.AbstractTraversable.map(Traversable.scala:105)
    at org.apache.spark.sql.hive.HiveContext.configure(HiveContext.scala:539)
    at org.apache.spark.sql.hive.HiveContext.metadataHive$lzycompute(HiveContext.scala:252)
    at org.apache.spark.sql.hive.HiveContext.metadataHive(HiveContext.scala:239)
    at org.apache.spark.sql.hive.HiveContext$$anon$2.<init>(HiveContext.scala:459)
    at org.apache.spark.sql.hive.HiveContext.catalog$lzycompute(HiveContext.scala:459)
    at org.apache.spark.sql.hive.HiveContext.catalog(HiveContext.scala:458)
    at org.apache.spark.sql.hive.HiveContext$$anon$3.<init>(HiveContext.scala:475)
    at org.apache.spark.sql.hive.HiveContext.analyzer$lzycompute(HiveContext.scala:475)
    at org.apache.spark.sql.hive.HiveContext.analyzer(HiveContext.scala:474)
    at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34)
    at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
    at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52)
    at org.apache.spark.sql.SQLContext.tables(SQLContext.scala:855)
    at myPackage.Ingestion$$anonfun$createStreamingContext$1.apply(Ingestion.scala:173)
    at myPackage.Ingestion$$anonfun$createStreamingContext$1.apply(Ingestion.scala:166)
    at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:661)
    at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:661)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:50)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50)
    at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:426)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:49)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
    at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:49)
    at scala.util.Try$.apply(Try.scala:161)
    at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
    at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:224)
    at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
    at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
    at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:223)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    at java.lang.Thread.run(Thread.java:745)
线程“main”java.lang.NullPointerException中的异常 位于org.apache.spark.sql.hive.client.ClientWrapper.conf(ClientWrapper.scala:205) 位于org.apache.spark.sql.hive.HiveContext.hiveconf$lzycompute(HiveContext.scala:554) 位于org.apache.spark.sql.hive.HiveContext.hiveconf(HiveContext.scala:553) 位于org.apache.spark.sql.hive.HiveContext$$anonfun$configure$1.apply(HiveContext.scala:540) 位于org.apache.spark.sql.hive.HiveContext$$anonfun$configure$1.apply(HiveContext.scala:539) 位于scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) 位于scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) 位于scala.collection.immutable.List.foreach(List.scala:318) 位于scala.collection.TraversableLike$class.map(TraversableLike.scala:244) 位于scala.collection.AbstractTraversable.map(Traversable.scala:105) 位于org.apache.spark.sql.hive.HiveContext.configure(HiveContext.scala:539) 位于org.apache.spark.sql.hive.HiveContext.metadataHive$lzycompute(HiveContext.scala:252) 位于org.apache.spark.sql.hive.HiveContext.metadataHive(HiveContext.scala:239) 位于org.apache.spark.sql.hive.HiveContext$$anon$2。(HiveContext.scala:459) 位于org.apache.spark.sql.hive.HiveContext.catalog$lzycompute(HiveContext.scala:459) 位于org.apache.spark.sql.hive.HiveContext.catalog(HiveContext.scala:458) 位于org.apache.spark.sql.hive.HiveContext$$anon$3。(HiveContext.scala:475) 位于org.apache.spark.sql.hive.HiveContext.analyzer$lzycompute(HiveContext.scala:475) 位于org.apache.spark.sql.hive.HiveContext.analyzer(HiveContext.scala:474) 位于org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) 位于org.apache.spark.sql.DataFrame(DataFrame.scala:133) 位于org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) 位于org.apache.spark.sql.SQLContext.tables(SQLContext.scala:855) 在myPackage.Ingestion$$anonfun$createStreamingContext$1.apply处(Ingestion.scala:173) 在myPackage.Ingestion$$anonfun$createStreamingContext$1.apply处(Ingestion.scala:166) 在org.apache.spark.streaming.dstream.dstream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(dstream.scala:661) 在org.apache.spark.streaming.dstream.dstream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(dstream.scala:661) 在org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:50) 在org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50) 在org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:50) 位于org.apache.spark.streaming.dstream.dstream.createRDDWithLocalProperties(dstream.scala:426) 在org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:49) 在org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply上(ForEachDStream.scala:49) 在org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply上(ForEachDStream.scala:49) 在scala.util.Try$.apply处(Try.scala:161) 位于org.apache.spark.streaming.scheduler.Job.run(Job.scala:39) 在org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:224) 位于org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224) 位于org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:224) 在scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)中 位于org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:223) 位于java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 位于java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 运行(Thread.java:745) 您是否知道导致此错误的原因,或者我如何修复它

谢谢,,
马可

我自己找到了答案。这个问题是因为我在StreamingContext之前创建了HiveContext。在StreamingContext创建之后移动创建解决了问题。

您能在我的电子邮件地址共享整个代码吗?图图。singh001@gmail.comjust切换2号线和3号线。