Scala 无法使用saveAsTextFile将RDD[String]保存为文本文件

Scala 无法使用saveAsTextFile将RDD[String]保存为文本文件,scala,classnotfoundexception,apache-spark-1.4,Scala,Classnotfoundexception,Apache Spark 1.4,当我尝试将RDD写入HDFS上的文本文件时,如下图所示,我得到了一个错误 val rdd = sc.textFile("/user/hadoop/dxld801/test.txt") val filtered = rdd.map({line=> line.replace("\\N","NULL")}) filtered.saveAsTextFile("hdfs:///user/hadoop/dxld801/test.txt") 错误: java.lang.RuntimeException

当我尝试将RDD写入HDFS上的文本文件时,如下图所示,我得到了一个错误

val rdd = sc.textFile("/user/hadoop/dxld801/test.txt")
val filtered = rdd.map({line=> line.replace("\\N","NULL")})
filtered.saveAsTextFile("hdfs:///user/hadoop/dxld801/test.txt")
错误:

java.lang.RuntimeException: java.lang.RuntimeException:
> java.lang.ClassNotFoundException: Class
> org.apache.hadoop.mapred.DirectFileOutputCommitter not found
>         at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1927)
>         at org.apache.hadoop.mapred.JobConf.getOutputCommitter(JobConf.java:722)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:983)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:109)
>         at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
>         at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:965)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:897)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:897)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:897)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:109)
>         at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
>         at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:896)
>         at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1400)
>         at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1379)
>         at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1379)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:109)
>         at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
>         at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1379)
>         at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
>         at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)
>         at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)
>         at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)
>         at $iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
>         at $iwC$$iwC$$iwC.<init>(<console>:39)
>         at $iwC$$iwC.<init>(<console>:41)
>         at $iwC.<init>(<console>:43)
>         at <init>(<console>:45)
>         at .<init>(<console>:49)
>         at .<clinit>(<console>)
>         at .<init>(<console>:7)
>         at .<clinit>(<console>)
>         at $print(<console>)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
>         at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
>         at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
>         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
>         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
>         at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
>         at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
>         at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
>         at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
>         at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
>         at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
>         at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
>         at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
>         at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
>         at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>         at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
>         at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
>         at org.apache.spark.repl.Main$.main(Main.scala:31)
>         at org.apache.spark.repl.Main.main(Main.scala)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:664)
>         at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:169)
>         at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:192)
>         at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:111)
>         at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.RuntimeException:
> java.lang.ClassNotFoundException: Class
> org.apache.hadoop.mapred.DirectFileOutputCommitter not found
>         at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1895)
>         at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1919)
>         ... 68 more Caused by: java.lang.ClassNotFoundException: Class org.apache.hadoop.mapred.DirectFileOutputCommitter not found
>         at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1801)
>         at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1893)
>         ... 69 more
原因:java.lang.RuntimeException: java.lang.ClassNotFoundException:类 找不到org.apache.hadoop.mapred.DirectFileOutputCommitter

我在spark shell中运行上述所有功能,我的spark版本是1.4.0

这是我用来启动shell的命令
$SPARK_HOME/bin/SPARK shell--包com.databricks:SPARK-csv_2.10:1.2.0--jars/HOME/hadoop/lib/native/hadoop-lzo-0.4.14.jar

我试着用谷歌搜索这个类“
DirectFileOutputCommitter
”的位置,但这个类在这个世界上似乎根本不存在

跟踪:

java.lang.RuntimeException: java.lang.RuntimeException:
> java.lang.ClassNotFoundException: Class
> org.apache.hadoop.mapred.DirectFileOutputCommitter not found
>         at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1927)
>         at org.apache.hadoop.mapred.JobConf.getOutputCommitter(JobConf.java:722)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:983)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:965)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:109)
>         at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
>         at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:965)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:897)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:897)
>         at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:897)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:109)
>         at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
>         at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:896)
>         at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1400)
>         at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1379)
>         at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1379)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148)
>         at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:109)
>         at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
>         at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1379)
>         at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26)
>         at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:31)
>         at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)
>         at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)
>         at $iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
>         at $iwC$$iwC$$iwC.<init>(<console>:39)
>         at $iwC$$iwC.<init>(<console>:41)
>         at $iwC.<init>(<console>:43)
>         at <init>(<console>:45)
>         at .<init>(<console>:49)
>         at .<clinit>(<console>)
>         at .<init>(<console>:7)
>         at .<clinit>(<console>)
>         at $print(<console>)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
>         at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
>         at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
>         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
>         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
>         at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
>         at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
>         at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
>         at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
>         at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
>         at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
>         at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
>         at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
>         at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
>         at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>         at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
>         at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
>         at org.apache.spark.repl.Main$.main(Main.scala:31)
>         at org.apache.spark.repl.Main.main(Main.scala)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:664)
>         at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:169)
>         at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:192)
>         at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:111)
>         at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.RuntimeException:
> java.lang.ClassNotFoundException: Class
> org.apache.hadoop.mapred.DirectFileOutputCommitter not found
>         at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1895)
>         at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1919)
>         ... 68 more Caused by: java.lang.ClassNotFoundException: Class org.apache.hadoop.mapred.DirectFileOutputCommitter not found
>         at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1801)
>         at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1893)
>         ... 69 more
java.lang.RuntimeException:java.lang.RuntimeException:
>java.lang.ClassNotFoundException:类
>找不到org.apache.hadoop.mapred.DirectFileOutputCommitter
>位于org.apache.hadoop.conf.Configuration.getClass(Configuration.java:1927)
>位于org.apache.hadoop.mapred.JobConf.getOutputCommitter(JobConf.java:722)
>位于org.apache.spark.rdd.pairddfunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(pairddfunctions.scala:983)
>位于org.apache.spark.rdd.pairddfunctions$$anonfun$saveAsHadoopFile$4.apply(pairddfunctions.scala:965)
>位于org.apache.spark.rdd.pairddfunctions$$anonfun$saveAsHadoopFile$4.apply(pairddfunctions.scala:965)
>位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148)
>位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:109)
>位于org.apache.spark.rdd.rdd.withScope(rdd.scala:286)
>位于org.apache.spark.rdd.pairddfunctions.saveAsHadoopFile(pairddfunctions.scala:965)
>位于org.apache.spark.rdd.pairddfunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(pairddfunctions.scala:897)
>位于org.apache.spark.rdd.pairddfunctions$$anonfun$saveAsHadoopFile$1.apply(pairddfunctions.scala:897)
>位于org.apache.spark.rdd.pairddfunctions$$anonfun$saveAsHadoopFile$1.apply(pairddfunctions.scala:897)
>位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148)
>位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:109)
>位于org.apache.spark.rdd.rdd.withScope(rdd.scala:286)
>位于org.apache.spark.rdd.pairddfunctions.saveAsHadoopFile(pairddfunctions.scala:896)
>在org.apache.spark.rdd.rdd$$anonfun$savastextfile$1.apply$mcV$sp(rdd.scala:1400)
>位于org.apache.spark.rdd.rdd$$anonfun$savastextfile$1.apply(rdd.scala:1379)
>位于org.apache.spark.rdd.rdd$$anonfun$savastextfile$1.apply(rdd.scala:1379)
>位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:148)
>位于org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:109)
>位于org.apache.spark.rdd.rdd.withScope(rdd.scala:286)
>位于org.apache.spark.rdd.rdd.saveAsTextFile(rdd.scala:1379)
>在$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC。(:26)
>在$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC。(:31)
>$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC。(:33)
>在$iwC$$iwC$$iwC$$iwC$$iwC。(:35)
>在$iwC$$iwC$$iwC$$iwC。(:37)
>在$iwC$$iwC$$iwC。(:39)
>在$iwC$$iwC。(:41)
>$iwC。(:43)
>在(:45)
>在。(:49)
>在
>在。(:7)
>在
>$print()
>在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处
>在sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)中
>在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)中
>位于java.lang.reflect.Method.invoke(Method.java:606)
>位于org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
>位于org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
>在org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
>在org.apache.spark.repl.SparkIMain.exploration上(SparkIMain.scala:871)
>在org.apache.spark.repl.SparkIMain.exploration上(SparkIMain.scala:819)
>在org.apache.spark.repl.SparkILoop.really上解释$1(SparkILoop.scala:857)
>位于org.apache.spark.repl.SparkILoop.interpretatingstartingwith(SparkILoop.scala:902)
>位于org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
>位于org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
>位于org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
>位于org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
>在org.apache.spark.repl.sparkilop$$anonfun$org$apache$spark$repl$sparkilop$$process$1.apply$mcZ$sp(sparkilop.scala:997)
>在org.apache.spark.repl.sparkilop$$anonfun$org$apache$spark$repl$sparkilop$$process$1.apply(sparkilop.scala:945)
>在org.apache.spark.repl.sparkilop$$anonfun$org$apache$spark$repl$sparkilop$$process$1.apply(sparkilop.scala:945)
>位于scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
>在org.apache.spark.repl.sparkloop.org$apache$spark$repl$sparkloop$$process上(sparkloop.scala:945)
>位于org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
>位于org.apache.spark.repl.Main$.Main(Main.scala:31)
>位于org.apache.spark.repl.Main.Main(Main.scala)
>在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处
>在sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)中
>在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)中
>在java.lang.reflect.Method.invoke(Method.java:6