Spark在查询Hadoop时提供InvalidProtocolBufferException
在Hadoop 2 namenode上的Spark(0.9.1)shell中运行此示例Spark在查询Hadoop时提供InvalidProtocolBufferException,hadoop,apache-spark,Hadoop,Apache Spark,在Hadoop 2 namenode上的Spark(0.9.1)shell中运行此示例 scala> val file1 = sc.textFile("hdfs://testhadoopname1.myserver.com:9000/user/ubuntu/events/datepart=2014-04-11/2014-04-11-09-42.txt") 为什么我会得到这个 java.io.IOException: Failed on local exception: com.googl
scala> val file1 = sc.textFile("hdfs://testhadoopname1.myserver.com:9000/user/ubuntu/events/datepart=2014-04-11/2014-04-11-09-42.txt")
为什么我会得到这个
java.io.IOException: Failed on local exception: com.google.protobuf.InvalidProtocolBufferException: Protocol message contained an invalid tag (zero).; Host Details : local host is: "testhadoopname1.myserver.com/10.255.187.229"; destination host is: "testhadoopname1.myserver.com":9000;
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764)
at org.apache.hadoop.ipc.Client.call(Client.java:1351)
at org.apache.hadoop.ipc.Client.call(Client.java:1300)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206)
at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:622)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:651)
at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1679)
at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1106)
at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1102)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1102)
at org.apache.hadoop.fs.FileSystem.globStatusInternal(FileSystem.java:1701)
at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1647)
at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:222)
at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:140)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:205)
at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:205)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:898)
at org.apache.spark.rdd.RDD.count(RDD.scala:726)
at $iwC$$iwC$$iwC$$iwC.(:15)
at $iwC$$iwC$$iwC.(:20)
at $iwC$$iwC.(:22)
at $iwC.(:24)
at (:26)
at .(:30)
at .()
at .(:7)
at .()
at $print()
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:622)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:772)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1040)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:609)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:640)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:604)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:793)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:838)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:750)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:598)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:605)
at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:608)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:931)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:881)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:881)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:881)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:973)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
Caused by: com.google.protobuf.InvalidProtocolBufferException: Protocol message contained an invalid tag (zero).
at com.google.protobuf.InvalidProtocolBufferException.invalidTag(InvalidProtocolBufferException.java:89)
at com.google.protobuf.CodedInputStream.readTag(CodedInputStream.java:108)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1398)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1362)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1492)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1487)
at com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200)
at com.google.protobuf.AbstractParser.parsePartialDelimitedFrom(AbstractParser.java:241)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:253)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:259)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:49)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parseDelimitedFrom(RpcHeaderProtos.java:2364)
at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:996)
at org.apache.hadoop.ipc.Client$Connection.run(Client.java:891)
java.io.IOException:本地异常失败:com.google.protobuf.InvalidProtocolBufferException:协议消息包含无效标记(零)。;主机详细信息:本地主机为:“testhadoopname1.myserver.com/10.255.187.229”;目标主机是:“testhadoopname1.myserver.com”:9000;
位于org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764)
位于org.apache.hadoop.ipc.Client.call(Client.java:1351)
位于org.apache.hadoop.ipc.Client.call(Client.java:1300)
位于org.apache.hadoop.ipc.protobufrpceengine$Invoker.invoke(protobufrpceengine.java:206)
位于com.sun.proxy.$Proxy14.getFileInfo(未知源)
在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处
在sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)中
在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)中
位于java.lang.reflect.Method.invoke(Method.java:622)
位于org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186)
位于org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
位于com.sun.proxy.$Proxy14.getFileInfo(未知源)
位于org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:651)
位于org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1679)
位于org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1106)
位于org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1102)
位于org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
位于org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1102)
位于org.apache.hadoop.fs.FileSystem.globStatusInternal(FileSystem.java:1701)
位于org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1647)
位于org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:222)
位于org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270)
位于org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:140)
位于org.apache.spark.rdd.rdd$$anonfun$partitions$2.apply(rdd.scala:207)
位于org.apache.spark.rdd.rdd$$anonfun$partitions$2.apply(rdd.scala:205)
在scala.Option.getOrElse(Option.scala:120)
位于org.apache.spark.rdd.rdd.partitions(rdd.scala:205)
位于org.apache.spark.rdd.mapperdd.getPartitions(mapperdd.scala:28)
位于org.apache.spark.rdd.rdd$$anonfun$partitions$2.apply(rdd.scala:207)
位于org.apache.spark.rdd.rdd$$anonfun$partitions$2.apply(rdd.scala:205)
在scala.Option.getOrElse(Option.scala:120)
位于org.apache.spark.rdd.rdd.partitions(rdd.scala:205)
位于org.apache.spark.SparkContext.runJob(SparkContext.scala:898)
位于org.apache.spark.rdd.rdd.count(rdd.scala:726)
在$iwC$$iwC$$iwC$$iwC。(:15)
在$iwC$$iwC$$iwC。(:20)
$iwC$$iwC。(:22)
iwC美元。(:24)
时(26分)
在。(:30)
在
在。(:7)
在
$print()
在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处
在sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)中
在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)中
位于java.lang.reflect.Method.invoke(Method.java:622)
在org.apache.spark.repl.SparkIMain$ReadEvalPrint.call上(SparkIMain.scala:772)
位于org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1040)
在org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:609)
在org.apache.spark.repl.SparkIMain.exploration上(SparkIMain.scala:640)
在org.apache.spark.repl.SparkIMain.exploration上(SparkIMain.scala:604)
在org.apache.spark.repl.SparkILoop.really上解释$1(SparkILoop.scala:793)
位于org.apache.spark.repl.SparkILoop.interpretatingstartingwith(SparkILoop.scala:838)
位于org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:750)
位于org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:598)
位于org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:605)
位于org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:608)
在org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:931)
在org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply上(SparkILoop.scala:881)
在org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply上(SparkILoop.scala:881)
位于scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
位于org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:881)
位于org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:973)
位于org.apache.spark.repl.Main$.Main(Main.scala:31)
位于org.apache.spark.repl.Main.Main(Main.scala)
原因:com.google.protobuf.InvalidProtocolBufferException:协议消息包含无效标记(零)。
位于com.google.protobuf.InvalidProtocolBufferException.invalidTag(InvalidProtocolBufferException.java:89)
位于com.google.protobuf.CodedInputStream.readTag(CodedInputStream.java:108)
在org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1398)
在org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1362)
位于org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1492)
在org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1487)
位于com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200)
位于com.google.protobuf.AbstractParser.parsePartialDelimitedFrom(AbstractParser.java:241)
位于com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:253)
在com.google上。
scala> file1.count()
java.io.IOException: Failed on local exception: com.google.protobuf.InvalidProtocolBufferException: Protocol message contained an invalid tag (zero).; Host Details : local host is: "testhadoopname1.myserver.com/10.255.187.229"; destination host is: "testhadoopname1.myserver.com":9000;
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764)
at org.apache.hadoop.ipc.Client.call(Client.java:1351)
at org.apache.hadoop.ipc.Client.call(Client.java:1300)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206)
at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:622)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:651)
at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1679)
at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1106)
at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1102)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1102)
at org.apache.hadoop.fs.FileSystem.globStatusInternal(FileSystem.java:1701)
at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1647)
at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:222)
at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:140)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:205)
at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:205)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:898)
at org.apache.spark.rdd.RDD.count(RDD.scala:726)
at $iwC$$iwC$$iwC$$iwC.(:15)
at $iwC$$iwC$$iwC.(:20)
at $iwC$$iwC.(:22)
at $iwC.(:24)
at (:26)
at .(:30)
at .()
at .(:7)
at .()
at $print()
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:622)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:772)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1040)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:609)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:640)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:604)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:793)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:838)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:750)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:598)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:605)
at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:608)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:931)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:881)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:881)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:881)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:973)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
Caused by: com.google.protobuf.InvalidProtocolBufferException: Protocol message contained an invalid tag (zero).
at com.google.protobuf.InvalidProtocolBufferException.invalidTag(InvalidProtocolBufferException.java:89)
at com.google.protobuf.CodedInputStream.readTag(CodedInputStream.java:108)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1398)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1362)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1492)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1487)
at com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200)
at com.google.protobuf.AbstractParser.parsePartialDelimitedFrom(AbstractParser.java:241)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:253)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:259)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:49)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parseDelimitedFrom(RpcHeaderProtos.java:2364)
at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:996)
at org.apache.hadoop.ipc.Client$Connection.run(Client.java:891)