Spark在查询Hadoop时提供InvalidProtocolBufferException

Spark在查询Hadoop时提供InvalidProtocolBufferException,hadoop,apache-spark,Hadoop,Apache Spark,在Hadoop 2 namenode上的Spark(0.9.1)shell中运行此示例 scala> val file1 = sc.textFile("hdfs://testhadoopname1.myserver.com:9000/user/ubuntu/events/datepart=2014-04-11/2014-04-11-09-42.txt") 为什么我会得到这个 java.io.IOException: Failed on local exception: com.googl

在Hadoop 2 namenode上的Spark(0.9.1)shell中运行此示例

scala> val file1 = sc.textFile("hdfs://testhadoopname1.myserver.com:9000/user/ubuntu/events/datepart=2014-04-11/2014-04-11-09-42.txt")
为什么我会得到这个

java.io.IOException: Failed on local exception: com.google.protobuf.InvalidProtocolBufferException: Protocol message contained an invalid tag (zero).; Host Details : local host is: "testhadoopname1.myserver.com/10.255.187.229"; destination host is: "testhadoopname1.myserver.com":9000; at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764) at org.apache.hadoop.ipc.Client.call(Client.java:1351) at org.apache.hadoop.ipc.Client.call(Client.java:1300) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:622) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:651) at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1679) at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1106) at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1102) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1102) at org.apache.hadoop.fs.FileSystem.globStatusInternal(FileSystem.java:1701) at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1647) at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:222) at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:140) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:205) at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:205) at org.apache.spark.SparkContext.runJob(SparkContext.scala:898) at org.apache.spark.rdd.RDD.count(RDD.scala:726) at $iwC$$iwC$$iwC$$iwC.(:15) at $iwC$$iwC$$iwC.(:20) at $iwC$$iwC.(:22) at $iwC.(:24) at (:26) at .(:30) at .() at .(:7) at .() at $print() at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:622) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:772) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1040) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:609) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:640) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:604) at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:793) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:838) at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:750) at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:598) at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:605) at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:608) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:931) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:881) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:881) at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:881) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:973) at org.apache.spark.repl.Main$.main(Main.scala:31) at org.apache.spark.repl.Main.main(Main.scala) Caused by: com.google.protobuf.InvalidProtocolBufferException: Protocol message contained an invalid tag (zero). at com.google.protobuf.InvalidProtocolBufferException.invalidTag(InvalidProtocolBufferException.java:89) at com.google.protobuf.CodedInputStream.readTag(CodedInputStream.java:108) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1398) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1362) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1492) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1487) at com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200) at com.google.protobuf.AbstractParser.parsePartialDelimitedFrom(AbstractParser.java:241) at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:253) at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:259) at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:49) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parseDelimitedFrom(RpcHeaderProtos.java:2364) at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:996) at org.apache.hadoop.ipc.Client$Connection.run(Client.java:891) java.io.IOException:本地异常失败:com.google.protobuf.InvalidProtocolBufferException:协议消息包含无效标记(零)。;主机详细信息:本地主机为:“testhadoopname1.myserver.com/10.255.187.229”;目标主机是:“testhadoopname1.myserver.com”:9000; 位于org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764) 位于org.apache.hadoop.ipc.Client.call(Client.java:1351) 位于org.apache.hadoop.ipc.Client.call(Client.java:1300) 位于org.apache.hadoop.ipc.protobufrpceengine$Invoker.invoke(protobufrpceengine.java:206) 位于com.sun.proxy.$Proxy14.getFileInfo(未知源) 在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处 在sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)中 在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)中 位于java.lang.reflect.Method.invoke(Method.java:622) 位于org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) 位于org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) 位于com.sun.proxy.$Proxy14.getFileInfo(未知源) 位于org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:651) 位于org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1679) 位于org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1106) 位于org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1102) 位于org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) 位于org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1102) 位于org.apache.hadoop.fs.FileSystem.globStatusInternal(FileSystem.java:1701) 位于org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1647) 位于org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:222) 位于org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270) 位于org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:140) 位于org.apache.spark.rdd.rdd$$anonfun$partitions$2.apply(rdd.scala:207) 位于org.apache.spark.rdd.rdd$$anonfun$partitions$2.apply(rdd.scala:205) 在scala.Option.getOrElse(Option.scala:120) 位于org.apache.spark.rdd.rdd.partitions(rdd.scala:205) 位于org.apache.spark.rdd.mapperdd.getPartitions(mapperdd.scala:28) 位于org.apache.spark.rdd.rdd$$anonfun$partitions$2.apply(rdd.scala:207) 位于org.apache.spark.rdd.rdd$$anonfun$partitions$2.apply(rdd.scala:205) 在scala.Option.getOrElse(Option.scala:120) 位于org.apache.spark.rdd.rdd.partitions(rdd.scala:205) 位于org.apache.spark.SparkContext.runJob(SparkContext.scala:898) 位于org.apache.spark.rdd.rdd.count(rdd.scala:726) 在$iwC$$iwC$$iwC$$iwC。(:15) 在$iwC$$iwC$$iwC。(:20) $iwC$$iwC。(:22) iwC美元。(:24) 时(26分) 在。(:30) 在 在。(:7) 在 $print() 在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处 在sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)中 在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)中 位于java.lang.reflect.Method.invoke(Method.java:622) 在org.apache.spark.repl.SparkIMain$ReadEvalPrint.call上(SparkIMain.scala:772) 位于org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1040) 在org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:609) 在org.apache.spark.repl.SparkIMain.exploration上(SparkIMain.scala:640) 在org.apache.spark.repl.SparkIMain.exploration上(SparkIMain.scala:604) 在org.apache.spark.repl.SparkILoop.really上解释$1(SparkILoop.scala:793) 位于org.apache.spark.repl.SparkILoop.interpretatingstartingwith(SparkILoop.scala:838) 位于org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:750) 位于org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:598) 位于org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:605) 位于org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:608) 在org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:931) 在org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply上(SparkILoop.scala:881) 在org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply上(SparkILoop.scala:881) 位于scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) 位于org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:881) 位于org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:973) 位于org.apache.spark.repl.Main$.Main(Main.scala:31) 位于org.apache.spark.repl.Main.Main(Main.scala) 原因:com.google.protobuf.InvalidProtocolBufferException:协议消息包含无效标记(零)。 位于com.google.protobuf.InvalidProtocolBufferException.invalidTag(InvalidProtocolBufferException.java:89) 位于com.google.protobuf.CodedInputStream.readTag(CodedInputStream.java:108) 在org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1398) 在org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1362) 位于org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1492) 在org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1487) 位于com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200) 位于com.google.protobuf.AbstractParser.parsePartialDelimitedFrom(AbstractParser.java:241) 位于com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:253) 在com.google上。
scala> file1.count()
java.io.IOException: Failed on local exception: com.google.protobuf.InvalidProtocolBufferException: Protocol message contained an invalid tag (zero).; Host Details : local host is: "testhadoopname1.myserver.com/10.255.187.229"; destination host is: "testhadoopname1.myserver.com":9000; at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764) at org.apache.hadoop.ipc.Client.call(Client.java:1351) at org.apache.hadoop.ipc.Client.call(Client.java:1300) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:622) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:651) at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1679) at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1106) at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1102) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1102) at org.apache.hadoop.fs.FileSystem.globStatusInternal(FileSystem.java:1701) at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1647) at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:222) at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:140) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:205) at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:205) at org.apache.spark.SparkContext.runJob(SparkContext.scala:898) at org.apache.spark.rdd.RDD.count(RDD.scala:726) at $iwC$$iwC$$iwC$$iwC.(:15) at $iwC$$iwC$$iwC.(:20) at $iwC$$iwC.(:22) at $iwC.(:24) at (:26) at .(:30) at .() at .(:7) at .() at $print() at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:622) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:772) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1040) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:609) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:640) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:604) at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:793) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:838) at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:750) at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:598) at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:605) at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:608) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:931) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:881) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:881) at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:881) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:973) at org.apache.spark.repl.Main$.main(Main.scala:31) at org.apache.spark.repl.Main.main(Main.scala) Caused by: com.google.protobuf.InvalidProtocolBufferException: Protocol message contained an invalid tag (zero). at com.google.protobuf.InvalidProtocolBufferException.invalidTag(InvalidProtocolBufferException.java:89) at com.google.protobuf.CodedInputStream.readTag(CodedInputStream.java:108) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1398) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1362) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1492) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1487) at com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200) at com.google.protobuf.AbstractParser.parsePartialDelimitedFrom(AbstractParser.java:241) at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:253) at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:259) at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:49) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parseDelimitedFrom(RpcHeaderProtos.java:2364) at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:996) at org.apache.hadoop.ipc.Client$Connection.run(Client.java:891)