Apache spark 由于阶段故障,火花作业中止

Apache spark 由于阶段故障,火花作业中止,apache-spark,apache-spark-ml,Apache Spark,Apache Spark Ml,我试图在spark shell中加载一个随机森林模型: /apache/spark-1.6.0-SNAPSHOT/bin/spark-shell \    --master yarn-client  \ --driver-java-options "-XX:MaxPermSize=4096m -XX:+UseConcMarkSweepGC" \ --executor-memory 12G \ --executor-cores 4 \ --num-executors

我试图在spark shell中加载一个随机森林模型:

/apache/spark-1.6.0-SNAPSHOT/bin/spark-shell \
   --master yarn-client  \
    --driver-java-options "-XX:MaxPermSize=4096m -XX:+UseConcMarkSweepGC" \
    --executor-memory 12G \
    --executor-cores 4 \
    --num-executors 1000 \
    --conf "spark.network.timeout=200000" \
    --verbose
然后:

当我加载一个较小的50树模型时,它工作正常,当我切换到100树时,它给出:

org.apache.spark.SparkException: Job aborted due to stage failure: Exception while getting task result: org.apache.spark.storage.BlockFetchException: Failed to fetch block from 1 locations. Most recent failure cause:
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
    at scala.Option.foreach(Option.scala:236)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858)
    at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1328)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
    at org.apache.spark.rdd.RDD.take(RDD.scala:1302)
    at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1342)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
    at org.apache.spark.rdd.RDD.first(RDD.scala:1341)
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42)
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44)
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46)
    at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:48)
    at $iwC$$iwC$$iwC$$iwC.<init>(<console>:50)
    at $iwC$$iwC$$iwC.<init>(<console>:52)
    at $iwC$$iwC.<init>(<console>:54)
    at $iwC.<init>(<console>:56)
    at <init>(<console>:58)
    at .<init>(<console>:62)
    at .<clinit>(<console>)
    at .<init>(<console>:7)
    at .<clinit>(<console>)
    at $print(<console>)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
    at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
    at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
    at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
    at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
    at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
    at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
    at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
    at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
    at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
    at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
    at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
    at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
    at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
    at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
    at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
    at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
    at org.apache.spark.repl.Main$.main(Main.scala:31)
    at org.apache.spark.repl.Main.main(Main.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
    at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
    at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
org.apache.spark.sparkeexception:作业因阶段失败而中止:获取任务结果时异常:org.apache.spark.storage.BlockFetchException:未能从1个位置获取块。最近的故障原因:
位于org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
位于org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
位于org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
位于scala.collection.mutable.resizeblearray$class.foreach(resizeblearray.scala:59)
位于scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
位于org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
位于org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
位于org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
位于scala.Option.foreach(Option.scala:236)
位于org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
位于org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
位于org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
位于org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
位于org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
位于org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
位于org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
位于org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
位于org.apache.spark.SparkContext.runJob(SparkContext.scala:1858)
在org.apache.spark.rdd.rdd$$anonfun$take$1.apply上(rdd.scala:1328)
位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
位于org.apache.spark.rdd.rdd.withScope(rdd.scala:316)
位于org.apache.spark.rdd.rdd.take(rdd.scala:1302)
位于org.apache.spark.rdd.rdd$$anonfun$first$1.apply(rdd.scala:1342)
位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
位于org.apache.spark.rdd.rdd.withScope(rdd.scala:316)
位于org.apache.spark.rdd.rdd.first(rdd.scala:1341)
在$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC。(:37)
在$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC。(:42)
在$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC。(:44)
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC。(:46)
在$iwC$$iwC$$iwC$$iwC$$iwC。(:48)
在$iwC$$iwC$$iwC$$iwC。(:50)
在$iwC$$iwC$$iwC。(:52)
$iwC$$iwC。(:54)
$iwC。(:56)
在(:58)
在。(:62)
在
在。(:7)
在
$print()
在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处
在sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)中
在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)中
位于java.lang.reflect.Method.invoke(Method.java:606)
位于org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
位于org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
在org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
在org.apache.spark.repl.SparkIMain.exploration上(SparkIMain.scala:871)
在org.apache.spark.repl.SparkIMain.exploration上(SparkIMain.scala:819)
在org.apache.spark.repl.SparkILoop.really上解释$1(SparkILoop.scala:857)
位于org.apache.spark.repl.SparkILoop.interpretatingstartingwith(SparkILoop.scala:902)
位于org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
位于org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
位于org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
位于org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
在org.apache.spark.repl.sparkilop$$anonfun$org$apache$spark$repl$sparkilop$$process$1.apply$mcZ$sp(sparkilop.scala:997)
在org.apache.spark.repl.sparkilop$$anonfun$org$apache$spark$repl$sparkilop$$process$1.apply(sparkilop.scala:945)
在org.apache.spark.repl.sparkilop$$anonfun$org$apache$spark$repl$sparkilop$$process$1.apply(sparkilop.scala:945)
位于scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
在org.apache.spark.repl.sparkloop.org$apache$spark$repl$sparkloop$$process上(sparkloop.scala:945)
位于org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
位于org.apache.spark.repl.Main$.Main(Main.scala:31)
位于org.apache.spark.repl.Main.Main(Main.scala)
在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处
在sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)中
在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)中
位于java.lang.reflect.Method.invoke(Method.java:606)
位于org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
位于org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
位于org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
位于org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
位于org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

这似乎是一个参数设置,我可以更改以使其工作,执行器内存已经是12G,我尝试加载的模型只有~400MB(100tree),200MB(50tree)

是否还有其他错误,p
org.apache.spark.SparkException: Job aborted due to stage failure: Exception while getting task result: org.apache.spark.storage.BlockFetchException: Failed to fetch block from 1 locations. Most recent failure cause:
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
    at scala.Option.foreach(Option.scala:236)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858)
    at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1328)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
    at org.apache.spark.rdd.RDD.take(RDD.scala:1302)
    at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1342)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
    at org.apache.spark.rdd.RDD.first(RDD.scala:1341)
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42)
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44)
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46)
    at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:48)
    at $iwC$$iwC$$iwC$$iwC.<init>(<console>:50)
    at $iwC$$iwC$$iwC.<init>(<console>:52)
    at $iwC$$iwC.<init>(<console>:54)
    at $iwC.<init>(<console>:56)
    at <init>(<console>:58)
    at .<init>(<console>:62)
    at .<clinit>(<console>)
    at .<init>(<console>:7)
    at .<clinit>(<console>)
    at $print(<console>)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
    at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
    at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
    at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
    at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
    at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
    at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
    at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
    at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
    at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
    at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
    at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
    at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
    at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
    at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
    at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
    at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
    at org.apache.spark.repl.Main$.main(Main.scala:31)
    at org.apache.spark.repl.Main.main(Main.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
    at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
    at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)