Java ClassNotFoundException HadoopMapReduceCommitProtocol

Java ClassNotFoundException HadoopMapReduceCommitProtocol,java,hadoop,apache-spark,classnotfoundexception,apache-spark-standalone,Java,Hadoop,Apache Spark,Classnotfoundexception,Apache Spark Standalone,我试图在本地模式下运行Spark示例,但得到以下堆栈跟踪: Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/internal/io/HadoopMapReduceCommitProtocol at java.lang.ClassLoader.defineClass1(Native Method) at java.lang.ClassLoader.defineClass(ClassLoader.

我试图在本地模式下运行Spark示例,但得到以下堆栈跟踪:

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/internal/io/HadoopMapReduceCommitProtocol
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at org.apache.spark.sql.internal.SQLConf$.<init>(SQLConf.scala:383)
at org.apache.spark.sql.internal.SQLConf$.<clinit>(SQLConf.scala)
at org.apache.spark.sql.internal.StaticSQLConf$$anonfun$buildConf$1.apply(SQLConf.scala:930)
at org.apache.spark.sql.internal.StaticSQLConf$$anonfun$buildConf$1.apply(SQLConf.scala:928)
at org.apache.spark.internal.config.TypedConfigBuilder$$anonfun$createWithDefault$1.apply(ConfigBuilder.scala:122)
at org.apache.spark.internal.config.TypedConfigBuilder$$anonfun$createWithDefault$1.apply(ConfigBuilder.scala:122)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.internal.config.TypedConfigBuilder.createWithDefault(ConfigBuilder.scala:122)
at org.apache.spark.sql.internal.StaticSQLConf$.<init>(SQLConf.scala:937)
at org.apache.spark.sql.internal.StaticSQLConf$.<clinit>(SQLConf.scala)
at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$sessionStateClassName(SparkSession.scala:962)
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:111)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:878)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:878)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:99)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:99)
at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:230)
at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
at scala.collection.mutable.HashMap.foreach(HashMap.scala:99)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:878)
at com.megaport.PipelineExample$.main(PipelineExample.scala:37)
at com.megaport.PipelineExample.main(PipelineExample.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)

Caused by: java.lang.ClassNotFoundException: org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)

如果它不在java库中,那么您应该下载依赖jar并添加它。 请检查此项以了解更多详细信息


如果它不在java库中,那么您应该下载依赖jar并添加它。 请检查此项以了解更多详细信息


检查POM文件中的依赖项。如果依赖项是正确的,并且类位于Maven Central repo的JAR中,那么Maven应该下载JAR并将其放入本地repo中。问题是,该类不在Maven获得的JAR文件中,也不在Spark发行版附带的JAR文件中。我曾使用jarfind.com试图找到从哪里获得它,但没有结果。。。我可以在GitHub存储库中看到源文件,但我不知道它放在哪个Jar文件中……我怀疑您(实际上)使用的Spark版本比Maven Central发布的最新版本早。您需要获取源代码并进行本地构建,然后将工件放入本地repo。。。或者去掉依赖于GitHub中预发布版本的任何东西。我会试试看,Stephen CStephen C-你是对的-请你“回答”这个问题,我会把它标记为正确的。看起来doco只是从2.1.0版本中剪切过来的…检查POM文件中的依赖项。如果依赖项是正确的,并且类位于Maven Central repo的JAR中,那么Maven应该下载JAR并将其放入本地repo中。问题是,该类不在Maven获得的JAR文件中,也不在Spark发行版附带的JAR文件中。我曾使用jarfind.com试图找到从哪里获得它,但没有结果。。。我可以在GitHub存储库中看到源文件,但我不知道它放在哪个Jar文件中……我怀疑您(实际上)使用的Spark版本比Maven Central发布的最新版本早。您需要获取源代码并进行本地构建,然后将工件放入本地repo。。。或者去掉依赖于GitHub中预发布版本的任何东西。我会试试看,Stephen CStephen C-你是对的-请你“回答”这个问题,我会把它标记为正确的。看起来doco只是从2.1.0版本中删减过来。。。
// scalastyle:off println
package com.megaport

// $example on$
import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.sql.Row
// $example off$
import org.apache.spark.sql.SparkSession

object PipelineExample {

def main(args: Array[String]): Unit = {
val spark = SparkSession.builder
  .appName("My Spark Application")  // optional and will be autogenerated if not specified
  .master("local[*]")               // avoid hardcoding the deployment environment
//      .enableHiveSupport()              // self-explanatory, isn't it?
  .getOrCreate

// $example on$
// Prepare training documents from a list of (id, text, label) tuples.
val training = spark.createDataFrame(Seq(
  (0L, "a b c d e spark", 1.0),
  (1L, "b d", 0.0),
  (2L, "spark f g h", 1.0),
  (3L, "hadoop mapreduce", 0.0)
)).toDF("id", "text", "label")

// Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
val tokenizer = new Tokenizer()
  .setInputCol("text")
  .setOutputCol("words")
val hashingTF = new HashingTF()
  .setNumFeatures(1000)
  .setInputCol(tokenizer.getOutputCol)
  .setOutputCol("features")
val lr = new LogisticRegression()
  .setMaxIter(10)
  .setRegParam(0.01)
val pipeline = new Pipeline()
  .setStages(Array(tokenizer, hashingTF, lr))

// Fit the pipeline to training documents.
val model = pipeline.fit(training)

// Now we can optionally save the fitted pipeline to disk
model.write.overwrite().save("/tmp/spark-logistic-regression-model")

// We can also save this unfit pipeline to disk
pipeline.write.overwrite().save("/tmp/unfit-lr-model")

// And load it back in during production
val sameModel = PipelineModel.load("/tmp/spark-logistic-regression-model")

// Prepare test documents, which are unlabeled (id, text) tuples.
val test = spark.createDataFrame(Seq(
  (4L, "spark i j k"),
  (5L, "l m n"),
  (6L, "mapreduce spark"),
  (7L, "apache hadoop")
)).toDF("id", "text")

// Make predictions on test documents.
model.transform(test)
  .select("id", "text", "probability", "prediction")
  .collect()
  .foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) =>
    println(s"($id, $text) --> prob=$prob, prediction=$prediction")
  }
// $example off$

spark.stop()
  }
}