Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Scala SparkException:作业因阶段故障而中止:使用Spark Graphx时出现NullPointerException_Scala_Apache Spark_Nullpointerexception_Spark Graphx - Fatal编程技术网

Scala SparkException:作业因阶段故障而中止:使用Spark Graphx时出现NullPointerException

Scala SparkException:作业因阶段故障而中止:使用Spark Graphx时出现NullPointerException,scala,apache-spark,nullpointerexception,spark-graphx,Scala,Apache Spark,Nullpointerexception,Spark Graphx,我是scala的新手,我正在寻找解决这个错误的方法 我正在研究的场景是这样的。我有三张桌子: 用户:包含ID和名称 业务:包含ID和名称 评论:包含user.ID和business.ID 只有用户进行审查,只有业务部门收到审查。图表将是这样的: 我要找的是: 对于每一个用户,我想知道对同一业务进行审查的其他用户 我执行以下操作以创建图形: val users = sqlContext.sql("Select user_id as ID from user") val business=

我是scala的新手,我正在寻找解决这个错误的方法


我正在研究的场景是这样的。我有三张桌子:

  • 用户:包含ID和名称

  • 业务:包含ID和名称

  • 评论:包含user.ID和business.ID

只有用户进行审查,只有业务部门收到审查。图表将是这样的:

我要找的是:

对于每一个用户,我想知道对同一业务进行审查的其他用户

我执行以下操作以创建图形:

val users = sqlContext.sql("Select user_id as ID from user")
val business= sqlContext.sql("Select business_id as ID from business")
users.write.mode(SaveMode.Append).saveAsTable("user_busin_db")
business.write.mode(SaveMode.Append).saveAsTable("user_busin_db")
val user_bus = sqlContext.sql("Select ID from user_busin_db")
val reviews = sqlContext.sql("Select user_id, business_id from review")
表格
user\u bus
将用于创建顶点

之后,我用以下代码创建了带有
GraphX
的图形:

def str2Long(s: String) = s.##.toLong

val vertex: RDD[(VertexId, String)] = user_bus.rdd.map(x => (str2Long(x(0).asInstanceOf[String]),(x(0).asInstanceOf[String])))
val edge:RDD[Edge[String]] = reviews.rdd.map(row => Edge(str2Long(row(0).asInstanceOf[String]), str2Long(row(1).asInstanceOf[String]), "review"))

val default = "missing"
val myGraph = Graph(vertex, edge, default)

myGraph.cache()
val userAggregate: VertexRDD[(List[Long])] = myGraph.aggregateMessages[(List[Long])](triplet => {
      triplet.sendToSrc((List(triplet.dstId)))
  },
  (a,b) => (a.union(b))
)

val businessAggregate: VertexRDD[(List[Long])] = myGraph.aggregateMessages[(List[Long])](triplet => {
      triplet.sendToDst((List(triplet.srcId)))
  },
  (a,b) => (a.union(b))
) 
现在,为了回答我的问题,我尝试使用以下代码为其他用户和企业提供聚合消息:

def str2Long(s: String) = s.##.toLong

val vertex: RDD[(VertexId, String)] = user_bus.rdd.map(x => (str2Long(x(0).asInstanceOf[String]),(x(0).asInstanceOf[String])))
val edge:RDD[Edge[String]] = reviews.rdd.map(row => Edge(str2Long(row(0).asInstanceOf[String]), str2Long(row(1).asInstanceOf[String]), "review"))

val default = "missing"
val myGraph = Graph(vertex, edge, default)

myGraph.cache()
val userAggregate: VertexRDD[(List[Long])] = myGraph.aggregateMessages[(List[Long])](triplet => {
      triplet.sendToSrc((List(triplet.dstId)))
  },
  (a,b) => (a.union(b))
)

val businessAggregate: VertexRDD[(List[Long])] = myGraph.aggregateMessages[(List[Long])](triplet => {
      triplet.sendToDst((List(triplet.srcId)))
  },
  (a,b) => (a.union(b))
) 
然后是给出错误的代码。为了收集每个用户的评论,我写了以下内容:

userAggregate.map(userAggr =>
    (userAggr._1, userAggr._2.flatMap(userAggrListElem =>
        userAggr._2.patch(0,businessAggregate.filter(busAggr => busAggr._1 == userAggrListElem).map(row => row._2).take(1)(0),userAggr._2.size+1))))
如果我尝试使用.collect或.count,则会出现以下错误:

org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 138.0 failed 1 times, most recent failure: Lost task 1.0 in stage 138.0 (TID 2807, localhost): java.lang.NullPointerException
at org.apache.spark.graphx.impl.VertexRDDImpl.mapVertexPartitions(VertexRDDImpl.scala:94)
    at org.apache.spark.graphx.VertexRDD.filter(VertexRDD.scala:98)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$5$$anonfun$apply$1.apply(<console>:102)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$5$$anonfun$apply$1.apply(<console>:101)
    at scala.collection.immutable.List.flatMap(List.scala:327)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$5.apply(<console>:101)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$5.apply(<console>:100)
    at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
    at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1769)
    at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1134)
    at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1134)
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1916)
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1916)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
    at org.apache.spark.scheduler.Task.run(Task.scala:86)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:314)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1442)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1441)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
    at scala.Option.foreach(Option.scala:257)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1667)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1622)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1611)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1890)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1903)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1916)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1930)
    at org.apache.spark.rdd.RDD.count(RDD.scala:1134)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:105)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:115)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:117)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:119)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:121)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:123)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:125)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:127)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:129)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:131)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:133)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:135)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:137)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:139)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:141)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:143)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:145)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:147)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw.<init>(<console>:149)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw.<init>(<console>:151)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw.<init>(<console>:153)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw.<init>(<console>:155)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$eval$.$print$lzycompute(<console>:7)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$eval$.$print(<console>:6)
Caused by: java.lang.NullPointerException
    at org.apache.spark.graphx.impl.VertexRDDImpl.mapVertexPartitions(VertexRDDImpl.scala:94)
    at org.apache.spark.graphx.VertexRDD.filter(VertexRDD.scala:98)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$5$$anonfun$apply$1.apply(<console>:102)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$5$$anonfun$apply$1.apply(<console>:101)
    at scala.collection.immutable.List.flatMap(List.scala:327)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$5.apply(<console>:101)
    at linea6ec9c0b0ced4184a0288c57eb3bdda585.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$5.apply(<console>:100)
    at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
    at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1769)
    at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1134)
    at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1134)
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1916)
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1916)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
    at org.apache.spark.scheduler.Task.run(Task.scala:86)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:314)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:748)
即:(用户ID,列表(对同一业务进行审查的用户ID,…)


现在我认为顶点有一个问题,某个未连接的顶点给了我空指针错误,但我无法找到它并从图形中删除。我能做些什么来解决这个问题?

TL;DR这不是有效的火花代码


这是一个预期的结果。不允许在Apache Spark中嵌套转换,因此您无法在
userAggregate.map

的闭包中访问
businessAggregate
。因此,没有办法执行我试图执行的操作?那么,如果我使用
take()
它为什么有效呢?