Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Apache spark BlockManager进程的火花连接被拒绝_Apache Spark_Yarn_Connection Timeout_Resourcemanager_Connection Refused - Fatal编程技术网

Apache spark BlockManager进程的火花连接被拒绝

Apache spark BlockManager进程的火花连接被拒绝,apache-spark,yarn,connection-timeout,resourcemanager,connection-refused,Apache Spark,Yarn,Connection Timeout,Resourcemanager,Connection Refused,我在CentOS VMs上以on-prem的方式设置了一个7节点集群。 早些时候,虚拟机位于同一个位置,一切正常,但从昨天起,由于虚拟机已分布在整个数据中心(但在同一机架中),我面临连接被拒绝和IOException:Failed to错误 日志: 2019-05-24 03:33:37信息广播:54-开始读取广播变量6 2019-05-24 03:33:37重试BlockFetcher时出错:143-开始提取1个未完成的块时出现异常 java.io.IOException:无法连接到:3800

我在CentOS VMs上以on-prem的方式设置了一个7节点集群。 早些时候,虚拟机位于同一个位置,一切正常,但从昨天起,由于虚拟机已分布在整个数据中心(但在同一机架中),我面临连接被拒绝和IOException:Failed to错误

日志:

2019-05-24 03:33:37信息广播:54-开始读取广播变量6
2019-05-24 03:33:37重试BlockFetcher时出错:143-开始提取1个未完成的块时出现异常
java.io.IOException:无法连接到:38000
位于org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:245)
位于org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:187)
位于org.apache.spark.network.netty.NettyBlockTransferService$$anon$2.createAndStart(NettyBlockTransferService.scala:113)
位于org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:141)
位于org.apache.spark.network.shuffle.RetryingBlockFetcher.start(RetryingBlockFetcher.java:121)
位于org.apache.spark.network.netty.NettyBlockTransferService.fetchBlocks(NettyBlockTransferService.scala:123)
位于org.apache.spark.network.BlockTransferService.fetchBlockSync(BlockTransferService.scala:98)
位于org.apache.spark.storage.BlockManager.getRemoteBytes(BlockManager.scala:693)
在org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1.apply$mcVI$sp(TorrentBroadcast.scala:162)
在org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1.apply(TorrentBroadcast.scala:151)
在org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1.apply(TorrentBroadcast.scala:151)
位于scala.collection.immutable.List.foreach(List.scala:381)
位于org.apache.spark.broadcast.TorrentBroadcast.org$apache$spark$broadcast$TorrentBroadcast$$readBlocks(TorrentBroadcast.scala:151)
在org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1$$anonfun$apply$2.apply(TorrentBroadcast.scala:231)
位于scala.Option.getOrElse(Option.scala:121)
在org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1.apply(TorrentBroadcast.scala:211)上
位于org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1347)
位于org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:207)
在org.apache.spark.broadcast.TorrentBroadcast.\u value$lzycompute(TorrentBroadcast.scala:66)
在org.apache.spark.broadcast.TorrentBroadcast.\u值(TorrentBroadcast.scala:66)
位于org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:96)
位于org.apache.spark.broadcast.broadcast.value(broadcast.scala:70)
位于org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anonfun$readToUnsafeMem$1.apply(TextFileFormat.scala:125)
位于org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anonfun$readToUnsafeMem$1.apply(TextFileFormat.scala:124)
位于org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:148)
位于org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:132)
位于org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128)
位于org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182)
位于org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109)
位于org.apache.spark.sql.catalyst.expressions.GeneratedClass$GenerateEditorForCodeGenStage1.processNext(未知源)
位于org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
位于org.apache.spark.sql.execution.whisttagecodegenexec$$anonfun$10$$anon$1.hasNext(whisttagecodegenexec.scala:614)
位于scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
位于scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
位于scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
位于scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
位于scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
位于scala.collection.Iterator$class.foreach(Iterator.scala:893)
位于scala.collection.AbstractIterator.foreach(迭代器.scala:1336)
位于scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
位于scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
位于scala.collection.TraversableOnce$class.aggregate(TraversableOnce.scala:214)
位于scala.collection.AbstractIterator.aggregate(Iterator.scala:1336)
位于org.apache.spark.rdd.rdd$$anonfun$aggregate$1$$anonfun$21.apply(rdd.scala:1122)
位于org.apache.spark.rdd.rdd$$anonfun$aggregate$1$$anonfun$21.apply(rdd.scala:1122)
位于org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:2130)
位于org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:2130)
位于org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
位于org.apache.spark.scheduler.Task.run(Task.scala:109)
位于org.apache.spark.executor.executor$TaskRunner.run(executor.scala:345)
位于java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
位于java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
运行(Thread.java:748)
原因:io.netty.channel.AbstractChannel$AnnotatedConnectionException:连接被拒绝::38000
在sun.nio.ch.socketchannel.checkConnect(本机方法)
位于sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
位于io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocke
2019-05-24 03:33:37 INFO  TorrentBroadcast:54 - Started reading broadcast variable 6
2019-05-24 03:33:37 ERROR RetryingBlockFetcher:143 - Exception while beginning fetch of 1 outstanding blocks 
java.io.IOException: Failed to connect to <HOST/IP>:38000
    at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:245)
    at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:187)
    at org.apache.spark.network.netty.NettyBlockTransferService$$anon$2.createAndStart(NettyBlockTransferService.scala:113)
    at org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:141)
    at org.apache.spark.network.shuffle.RetryingBlockFetcher.start(RetryingBlockFetcher.java:121)
    at org.apache.spark.network.netty.NettyBlockTransferService.fetchBlocks(NettyBlockTransferService.scala:123)
    at org.apache.spark.network.BlockTransferService.fetchBlockSync(BlockTransferService.scala:98)
    at org.apache.spark.storage.BlockManager.getRemoteBytes(BlockManager.scala:693)
    at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1.apply$mcVI$sp(TorrentBroadcast.scala:162)
    at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1.apply(TorrentBroadcast.scala:151)
    at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$readBlocks$1.apply(TorrentBroadcast.scala:151)
    at scala.collection.immutable.List.foreach(List.scala:381)
    at org.apache.spark.broadcast.TorrentBroadcast.org$apache$spark$broadcast$TorrentBroadcast$$readBlocks(TorrentBroadcast.scala:151)
    at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1$$anonfun$apply$2.apply(TorrentBroadcast.scala:231)
    at scala.Option.getOrElse(Option.scala:121)
    at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1.apply(TorrentBroadcast.scala:211)
    at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1347)
    at org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:207)
    at org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute(TorrentBroadcast.scala:66)
    at org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast.scala:66)
    at org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:96)
    at org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)
    at org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anonfun$readToUnsafeMem$1.apply(TextFileFormat.scala:125)
    at org.apache.spark.sql.execution.datasources.text.TextFileFormat$$anonfun$readToUnsafeMem$1.apply(TextFileFormat.scala:124)
    at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:148)
    at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:132)
    at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:128)
    at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:182)
    at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:109)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
    at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
    at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:614)
    at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
    at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
    at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
    at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
    at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
    at scala.collection.Iterator$class.foreach(Iterator.scala:893)
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
    at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
    at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
    at scala.collection.TraversableOnce$class.aggregate(TraversableOnce.scala:214)
    at scala.collection.AbstractIterator.aggregate(Iterator.scala:1336)
    at org.apache.spark.rdd.RDD$$anonfun$aggregate$1$$anonfun$21.apply(RDD.scala:1122)
    at org.apache.spark.rdd.RDD$$anonfun$aggregate$1$$anonfun$21.apply(RDD.scala:1122)
    at org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:2130)
    at org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:2130)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
    at org.apache.spark.scheduler.Task.run(Task.scala:109)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)
Caused by: io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: <HOST/IP>:38000
    at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
    at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
    at io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:323)
    at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:340)
    at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:633)
    at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:580)
    at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:497)
    at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:459)
    at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
    at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
    ... 1 more
Caused by: java.net.ConnectException: Connection refused
    ... 11 more
2019-05-24 03:33:37 INFO  RetryingBlockFetcher:164 - Retrying fetch (1/3) for 1 outstanding blocks after 5000 ms
2019-05-24 03:33:42 ERROR RetryingBlockFetcher:143 - Exception while beginning fetch of 1 outstanding blocks (after 1 retries)
java.io.IOException: Failed to connect to <HOST/IP>:38000
    at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:245)
    at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:187)
    at org.apache.spark.network.netty.NettyBlockTransferService$$anon$2.createAndStart(NettyBlockTransferService.scala:113)
    at org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:141)
    at org.apache.spark.network.shuffle.RetryingBlockFetcher.lambda$initiateRetry$0(RetryingBlockFetcher.java:169)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
    at java.lang.Thread.run(Thread.java:748)
Caused by: io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: <HOST/IP>:38000
    at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
    at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
    at io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:323)
    at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:340)
    at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:633)
    at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:580)
    at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:497)
    at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:459)
    at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
    ... 2 more
Caused by: java.net.ConnectException: Connection refused
    ... 11 more
2019-05-24 03:33:42 INFO  RetryingBlockFetcher:164 - Retrying fetch (2/3) for 1 outstanding blocks after 5000 ms
2019-05-24 03:33:47 ERROR RetryingBlockFetcher:143 - Exception while beginning fetch of 1 outstanding blocks (after 2 retries)
java.io.IOException: Failed to connect to <HOST/IP>:38000
    at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:245)
    at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:187)
    at org.apache.spark.network.netty.NettyBlockTransferService$$anon$2.createAndStart(NettyBlockTransferService.scala:113)
    at org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:141)
    at org.apache.spark.network.shuffle.RetryingBlockFetcher.lambda$initiateRetry$0(RetryingBlockFetcher.java:169)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
    at java.lang.Thread.run(Thread.java:748)
Caused by: io.netty.channel.AbstractChannel$AnnotatedConnectException: Connection refused: <HOST/IP>:38000
    at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
    at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
    at io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:323)
    at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:340)
    at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:633)
    at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:580)
    at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:497)
    at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:459)
    at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
    ... 2 more
Caused by: java.net.ConnectException: Connection refused
    ... 11 more