Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/scala/16.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Spark/Scala-项目在IntelliJ中运行良好,但在SBT中抛出错误_Scala_Apache Spark_Intellij Idea_Sbt - Fatal编程技术网

Spark/Scala-项目在IntelliJ中运行良好,但在SBT中抛出错误

Spark/Scala-项目在IntelliJ中运行良好,但在SBT中抛出错误,scala,apache-spark,intellij-idea,sbt,Scala,Apache Spark,Intellij Idea,Sbt,我有一个Spark项目,我在IntelliJ本地运行,从那里运行时运行良好。该项目非常简单,目前只是一个玩具示例。代码如下: package mls.main import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.{SparkConf, SparkCont

我有一个Spark项目,我在IntelliJ本地运行,从那里运行时运行良好。该项目非常简单,目前只是一个玩具示例。代码如下:

package mls.main


import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
import java.nio.file.{Paths, Files}
import scala.io.Source


object Main {

  def main(args: Array[String]) {
    import org.apache.log4j.Logger
    import org.apache.log4j.Level
    print("HELLO WORLD!")
    Logger.getLogger("org").setLevel(Level.WARN)
    Logger.getLogger("akka").setLevel(Level.WARN)

    // fire up spark
    val sc = createContext
    val sqlContext = new SQLContext(sc)
    loadAHSData(List("x"),sqlContext)

  }

  def loadAHSData(years: List[String], sqlContext : SQLContext) : Unit = {
    // load the column names that exists in all 3 datasets
    val columns = sqlContext.sparkContext
      .textFile("data/common_columns.txt")
      .collect()
      .toSeq

    columns.foreach(println)
  }


  def createContext(appName: String, masterUrl: String): SparkContext = {
    val conf = new SparkConf().setAppName(appName).setMaster(masterUrl)
    new SparkContext(conf)
  }

  def createContext(appName: String): SparkContext = createContext(appName, "local")

  def createContext: SparkContext = createContext("Data Application", "local")
}
当我通过IntelliJ运行时,我会从指定的文本文件中获得包含两列的正确输出。但是,当
cd
进入正确的目录,然后运行
sbt run
时,我会看到“HELLO WORLD!”输出,但随后会出现以下堆栈跟踪失败:

java.lang.ClassNotFoundException: scala.None$
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at 

org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67)
    at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1826)
    at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1713)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2000)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2245)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:422)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:309)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

17/12/13 09:52:14 WARN FileSystem: exception in the cleaner thread but it will continue to run
java.lang.InterruptedException
    at java.lang.Object.wait(Native Method)
    at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:143)
    at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:164)
    at org.apache.hadoop.fs.FileSystem$Statistics$StatisticsDataReferenceCleaner.run(FileSystem.java:2989)
    at java.lang.Thread.run(Thread.java:748)
17/12/13 09:52:14 ERROR Utils: uncaught error in thread SparkListenerBus, stopping SparkContext
java.lang.InterruptedException
    at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:998)
    at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304)
    at java.util.concurrent.Semaphore.acquire(Semaphore.java:312)
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(LiveListenerBus.scala:80)
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79)
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(LiveListenerBus.scala:78)
    at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1279)
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1.run(LiveListenerBus.scala:77)
17/12/13 09:52:14 ERROR Utils: throw uncaught fatal error in thread SparkListenerBus
java.lang.InterruptedException
    at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:998)
    at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304)
    at java.util.concurrent.Semaphore.acquire(Semaphore.java:312)
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(LiveListenerBus.scala:80)
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79)
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79)
    at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(LiveListenerBus.scala:78)
    at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1279)
    at org.apache.spark.scheduler.LiveListenerBus$$anon$1.run(LiveListenerBus.scala:77)
17/12/13 09:52:14 ERROR ContextCleaner: Error in cleaning thread
java.lang.InterruptedException
    at java.lang.Object.wait(Native Method)
    at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:143)
    at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:181)
    at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1279)
    at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178)
    at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73)
我的build.sbt看起来像:

name := "MLS_scala"

version := "0.1"

scalaVersion := "2.11.1"

resolvers ++= Seq(
  Resolver.sonatypeRepo("releases"),
  Resolver.sonatypeRepo("snapshots")
)

val sparkVersion = "2.2.0"
libraryDependencies ++= Seq(
  "org.apache.spark" %% "spark-core" % sparkVersion,
  "org.apache.spark" %% "spark-sql" % sparkVersion,
  "org.apache.spark" %% "spark-mllib" % sparkVersion,
  "org.apache.spark" %% "spark-streaming" % sparkVersion,
  "org.apache.spark" %% "spark-hive" % sparkVersion
)

我无法从IntelliJ中找出为什么它能完美工作,但从sbt中得到了这个错误。请让我知道是否有任何我可以做的步骤来解决这个问题。谢谢大家!

可能与scala库版本控制有关,请尝试将其添加到build.sbt中:

fork := true
或者,仅适用于普通运行:

fork in run := true

您必须先使用SBT编译项目,然后再运行它,使用
SBT compile
,并确保在
SBT run
之前使用SBT成功构建项目,这是一个指南链接

SBT的版本是什么?您是否有
project/build.properties
文件?里面有什么?这是可行的,但有人能再解释一下吗?@Thusitha这意味着sbt在一个单独的JVM中运行你的应用程序,而不是在运行sbt本身的JVM中运行,这是默认行为。使用Spark这样的框架将JVM调整推到了极限,很容易看出这是如何避免一些麻烦的。如果我不够准确,请参见例如。