Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Apache spark VPS上的Spark Streaming connect水槽水槽连接被拒绝_Apache Spark_Spark Streaming_Flume - Fatal编程技术网

Apache spark VPS上的Spark Streaming connect水槽水槽连接被拒绝

Apache spark VPS上的Spark Streaming connect水槽水槽连接被拒绝,apache-spark,spark-streaming,flume,Apache Spark,Spark Streaming,Flume,我在这里用Spark Streaming(2.2.0)和Flume(1.6.0)的pull方法做一个wordcount实验。在我的本地虚拟机上一切正常,所以我把所有的东西都在线转移到Vultr上的VPS上。使用第三方面板进行管理,并配置sucerity组 以下是我的工作: 启动Flume服务器,将souce绑定到44444上,spark sink绑定到41414上。使用netstat-anp | grep 4444444和41414进行测试,显示正确的用法。所以水槽工作正常 Telnet本地主机

我在这里用Spark Streaming(2.2.0)和Flume(1.6.0)的pull方法做一个wordcount实验。在我的本地虚拟机上一切正常,所以我把所有的东西都在线转移到Vultr上的VPS上。使用第三方面板进行管理,并配置sucerity组

以下是我的工作:

  • 启动Flume服务器,将souce绑定到44444上,spark sink绑定到41414上。使用netstat-anp | grep 4444444和41414进行测试,显示正确的用法。所以水槽工作正常
  • Telnet本地主机44444,用于模拟输入
  • 使用IDEA测试我的程序,下面是出现问题的地方:
  • 以下是错误消息:

    20/09/11 13:39:33 ERROR ReceiverTracker: Deregistered receiver for stream 0: Error starting receiver 0 - java.io.IOException: Error connecting to hadoop/96.30.196.34:41414
        at org.apache.avro.ipc.NettyTransceiver.getChannel(NettyTransceiver.java:261)
        at org.apache.avro.ipc.NettyTransceiver.<init>(NettyTransceiver.java:203)
        at org.apache.avro.ipc.NettyTransceiver.<init>(NettyTransceiver.java:138)
        at org.apache.spark.streaming.flume.FlumePollingReceiver$$anonfun$onStart$1.apply(FlumePollingInputDStream.scala:83)
        at org.apache.spark.streaming.flume.FlumePollingReceiver$$anonfun$onStart$1.apply(FlumePollingInputDStream.scala:82)
        at scala.collection.immutable.List.foreach(List.scala:381)
        at org.apache.spark.streaming.flume.FlumePollingReceiver.onStart(FlumePollingInputDStream.scala:82)
        at org.apache.spark.streaming.receiver.ReceiverSupervisor.startReceiver(ReceiverSupervisor.scala:149)
        at org.apache.spark.streaming.receiver.ReceiverSupervisor.start(ReceiverSupervisor.scala:131)
        at org.apache.spark.streaming.scheduler.ReceiverTracker$ReceiverTrackerEndpoint$$anonfun$9.apply(ReceiverTracker.scala:607)
        at org.apache.spark.streaming.scheduler.ReceiverTracker$ReceiverTrackerEndpoint$$anonfun$9.apply(ReceiverTracker.scala:597)
        at org.apache.spark.SparkContext$$anonfun$34.apply(SparkContext.scala:2173)
        at org.apache.spark.SparkContext$$anonfun$34.apply(SparkContext.scala:2173)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
    Caused by: java.net.ConnectException: Connection refused: hadoop/96.30.196.34:41414
        at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
        at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:714)
        at org.jboss.netty.channel.socket.nio.NioClientBoss.connect(NioClientBoss.java:152)
        at org.jboss.netty.channel.socket.nio.NioClientBoss.processSelectedKeys(NioClientBoss.java:105)
        at org.jboss.netty.channel.socket.nio.NioClientBoss.process(NioClientBoss.java:79)
        at org.jboss.netty.channel.socket.nio.AbstractNioSelector.run(AbstractNioSelector.java:337)
        at org.jboss.netty.channel.socket.nio.NioClientBoss.run(NioClientBoss.java:42)
        at org.jboss.netty.util.ThreadRenamingRunnable.run(ThreadRenamingRunnable.java:108)
        at org.jboss.netty.util.internal.DeadLockProofWorker$1.run(DeadLockProofWorker.java:42)
        ... 3 more
    
    20/09/11 13:39:33错误ReceiverTracker:流0的已注销接收器:启动接收器0时出错-java.io.IOException:连接到hadoop时出错/96.30.196.34:41414
    位于org.apache.avro.ipc.NettyTransceiver.getChannel(NettyTransceiver.java:261)
    位于org.apache.avro.ipc.NettyTransceiver(NettyTransceiver.java:203)
    位于org.apache.avro.ipc.NettyTransceiver(NettyTransceiver.java:138)
    在org.apache.spark.streaming.flume.FlumePollingReceiver$$anonfun$onStart$1.apply上(flumepollingputdstream.scala:83)
    在org.apache.spark.streaming.flume.FlumePollingReceiver$$anonfun$onStart$1.apply上(flumepollingputdstream.scala:82)
    位于scala.collection.immutable.List.foreach(List.scala:381)
    位于org.apache.spark.streaming.flume.FlumePollingReceiver.onStart(FlumePollingInputStream.scala:82)
    位于org.apache.spark.streaming.receiver.ReceiverSupervisor.startReceiver(ReceiverSupervisor.scala:149)
    位于org.apache.spark.streaming.receiver.ReceiverSupervisor.start(ReceiverSupervisor.scala:131)
    在org.apache.spark.streaming.scheduler.ReceiverTracker$receivertrackerdpoint$$anonfun$9.apply(ReceiverTracker.scala:607)
    在org.apache.spark.streaming.scheduler.ReceiverTracker$ReceiverTrackerEndpoint$$anonfun$9.apply(ReceiverTracker.scala:597)上
    位于org.apache.spark.SparkContext$$anonfun$34.apply(SparkContext.scala:2173)
    位于org.apache.spark.SparkContext$$anonfun$34.apply(SparkContext.scala:2173)
    位于org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
    位于org.apache.spark.scheduler.Task.run(Task.scala:108)
    位于org.apache.spark.executor.executor$TaskRunner.run(executor.scala:335)
    位于java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    位于java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    运行(Thread.java:748)
    原因:java.net.ConnectException:连接被拒绝:hadoop/96.30.196.34:414
    在sun.nio.ch.socketchannel.checkConnect(本机方法)
    位于sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:714)
    位于org.jboss.netty.channel.socket.nio.NioClientBoss.connect(NioClientBoss.java:152)
    位于org.jboss.netty.channel.socket.nio.NioClientBoss.processSelectedKeys(NioClientBoss.java:105)
    位于org.jboss.netty.channel.socket.nio.NioClientBoss.process(NioClientBoss.java:79)
    位于org.jboss.netty.channel.socket.nio.AbstractNioSelector.run(AbstractNioSelector.java:337)
    位于org.jboss.netty.channel.socket.nio.NioClientBoss.run(NioClientBoss.java:42)
    位于org.jboss.netty.util.ThreadRenamingRunnable.run(ThreadRenamingRunnable.java:108)
    位于org.jboss.netty.util.internal.DeadLockProofWorker$1.run(DeadLockProofWorker.java:42)
    ... 3个以上
    
    什么可能导致使用VPS的连接被拒绝错误?

    下面是我的wordcount示例,主机名和端口在程序参数中设置为hadoop 41414,hadoop在主机中正确设置为VPS的公共IP

        import org.apache.spark.SparkConf
        import org.apache.spark.streaming.flume.FlumeUtils
        import org.apache.spark.streaming.{Seconds, StreamingContext}
        
        object Count {
        
          def main(args: Array[String]): Unit = {
        
            if(args.length !=2){
              System.err.println("Usage: FlumePullWordCount <hostname> <port>")
              System.exit(1)
            }
        
            val Array(hostname, port) = args
            val sparkConf = new SparkConf().setAppName("Count").setMaster("local[2]")
            val ssc = new StreamingContext(sparkConf, Seconds(5))
        
            // Get stream from 41414 based on the flume conf
            val flumeStream = FlumeUtils.createPollingStream(ssc, hostname, port.toInt)
        
            flumeStream.map(x=> new String(x.event.getBody.array()).trim)
              .flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).print()
        
            ssc.start()
            ssc.awaitTermination()
          }
    
    }
    
    import org.apache.spark.SparkConf
    导入org.apache.spark.streaming.flume.FlumeUtils
    导入org.apache.spark.streaming.{Seconds,StreamingContext}
    对象计数{
    def main(参数:数组[字符串]):单位={
    如果(参数长度!=2){
    System.err.println(“用法:FlumePullWordCount”)
    系统出口(1)
    }
    val数组(主机名、端口)=参数
    val sparkConf=new sparkConf().setAppName(“计数”).setMaster(“本地[2]”)
    val ssc=新的StreamingContext(sparkConf,秒(5))
    //根据flume配置从41414获取流
    val flumeStream=FlumeUtils.createPollingStream(ssc、主机名、端口.toInt)
    flumeStream.map(x=>新字符串(x.event.getBody.array()).trim)
    .flatMap(u.split(“”).map(u,1)).reduceByKey(+).print()
    ssc.start()
    ssc.终止协议()
    }
    }