Apache spark Spark Atlas连接器零点启动期间的异常
我正试图开始我的工作,我已经做了测试集成火花与阿特拉斯 这是一个简单的工作,从一个话题读到另一个话题写Apache spark Spark Atlas连接器零点启动期间的异常,apache-spark,hortonworks-data-platform,apache-atlas,Apache Spark,Hortonworks Data Platform,Apache Atlas,我正试图开始我的工作,我已经做了测试集成火花与阿特拉斯 这是一个简单的工作,从一个话题读到另一个话题写 val sparkConf = new SparkConf() .setAppName("atlas-test") .setMaster("local[2]") .set("spark.extraListeners", "com.hortonworks.spark.atlas.SparkAtlasEventTracker") .set("s
val sparkConf = new SparkConf()
.setAppName("atlas-test")
.setMaster("local[2]")
.set("spark.extraListeners", "com.hortonworks.spark.atlas.SparkAtlasEventTracker")
.set("spark.sql.queryExecutionListeners", "com.hortonworks.spark.atlas.SparkAtlasEventTracker")
.set("spark.sql.streaming.streamingQueryListeners", "com.hortonworks.spark.atlas.SparkAtlasStreamingQueryEventTracker")
val spark = SparkSession.builder()
.config(sparkConf)
.enableHiveSupport()
.getOrCreate()
import spark.implicits._
val df = spark.read.format("kafka")
.option("kafka.bootstrap.servers", BROKER_SERVERS)
.option("subscribe", "foobar2")
.option("startingOffset", "earliest")
.option("kafka.atlas.cluster.name", clusterName)
.load()
println("---------------------------------------------")
df.printSchema()
val dfs = df.selectExpr("CAST(key as STRING)","CAST(value AS STRING)").as[(String, String)]
dfs.show()
println("---------------------------------------------")
df.write
.format("kafka")
.option("kafka.bootstrap.servers", BROKER_SERVERS)
.option("topic", "foobar-out")
.option("kafka.atlas.cluster.name", clusterName)
.save()
一切似乎都可以理解。所以我尝试在我的IDE(Intellij)中运行作业,几乎每次我都遇到这个异常
19/08/12 17:00:08 WARN SparkExecutionPlanProcessor: Caught exception during parsing event
java.lang.NullPointerException
at org.apache.spark.sql.internal.SQLConf$$anonfun$14.apply(SQLConf.scala:133)
at org.apache.spark.sql.internal.SQLConf$$anonfun$14.apply(SQLConf.scala:133)
at scala.Option.map(Option.scala:146)
at org.apache.spark.sql.internal.SQLConf$.get(SQLConf.scala:133)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.simpleString(SaveIntoDataSourceCommand.scala:52)
at org.apache.spark.sql.catalyst.plans.QueryPlan.verboseString(QueryPlan.scala:177)
at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:548)
at org.apache.spark.sql.catalyst.trees.TreeNode.treeString(TreeNode.scala:472)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$4.apply(QueryExecution.scala:197)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$4.apply(QueryExecution.scala:197)
at org.apache.spark.sql.execution.QueryExecution.stringOrError(QueryExecution.scala:99)
at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:197)
at com.hortonworks.spark.atlas.sql.CommandsHarvester$.com$hortonworks$spark$atlas$sql$CommandsHarvester$$getPlanInfo(CommandsHarvester.scala:214)
at com.hortonworks.spark.atlas.sql.CommandsHarvester$.com$hortonworks$spark$atlas$sql$CommandsHarvester$$makeProcessEntities(CommandsHarvester.scala:222)
at com.hortonworks.spark.atlas.sql.CommandsHarvester$SaveIntoDataSourceHarvester$.harvest(CommandsHarvester.scala:183)
at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor$$anonfun$2.apply(SparkExecutionPlanProcessor.scala:108)
at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor$$anonfun$2.apply(SparkExecutionPlanProcessor.scala:89)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor.process(SparkExecutionPlanProcessor.scala:89)
at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor.process(SparkExecutionPlanProcessor.scala:63)
at com.hortonworks.spark.atlas.AbstractEventProcessor$$anonfun$eventProcess$1.apply(AbstractEventProcessor.scala:72)
at com.hortonworks.spark.atlas.AbstractEventProcessor$$anonfun$eventProcess$1.apply(AbstractEventProcessor.scala:71)
at scala.Option.foreach(Option.scala:257)
at com.hortonworks.spark.atlas.AbstractEventProcessor.eventProcess(AbstractEventProcessor.scala:71)
at com.hortonworks.spark.atlas.AbstractEventProcessor$$anon$1.run(AbstractEventProcessor.scala:38)
我正在使用spark 2.4.0和scala 2.11
我对结果有些误解。老实说我不明白这份工作在我的阿特拉斯(本地机器)之后会出现什么?因为有时作业会成功运行,但Atlas中没有显示任何内容。您是否验证了列名和类型是否正确?我刚刚遇到这个问题(相同的堆栈跟踪),列名称/类型错误,更正后似乎解决了我的问题。你找到解决方案了吗?有解决方案吗?