Scala Spark 2.0-无法从其他对象访问UDF
我在Spark 2.0.1中访问自定义项时遇到问题。在Scala Spark 2.0-无法从其他对象访问UDF,scala,apache-spark,user-defined-functions,udf,Scala,Apache Spark,User Defined Functions,Udf,我在Spark 2.0.1中访问自定义项时遇到问题。在Task.scala中,withColumn从Util.scala调用函数。这 抛出由以下原因引起的:java.lang.UnsupportedOperationException:不支持Char类型的架构异常,我无法找到此Char的来源 如果我将我的UDF放在Task.scala中,它可以正常工作 package main.scala import java.time.DayOfWeek import java.time.LocalDat
Task.scala
中,withColumn
从Util.scala
调用函数。这
抛出由以下原因引起的:java.lang.UnsupportedOperationException:不支持Char类型的架构
异常,我无法找到此Char
的来源
如果我将我的UDF放在Task.scala
中,它可以正常工作
package main.scala
import java.time.DayOfWeek
import java.time.LocalDate
import java.time.format.DateTimeFormatter
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
object Task {
def extractDay : (String => String) = (date: String) => {
val date_parsed : String = date.split(" ")(0)
val formatter : DateTimeFormatter = DateTimeFormatter.ofPattern("M/d/yy")
val local : DayOfWeek = LocalDate.parse(date_parsed, formatter).getDayOfWeek()
local.toString
}
def main(args: Array[String]) {
val spark = SparkSession.builder().getOrCreate()
import spark.implicits._
val col_extractDay = udf(extractDay)
// Load LabStat Usage
val df_usage = spark.read
.format("com.databricks.spark.csv")
.option("header", "true")
.option("parserLib", "UNIVOCITY")
.option("inferSchema", "true")
.load("data/mydata.csv")
.toDF
df_usage.show()
df_usage.printSchema()
val df_usage2 = df_usage.withColumn("Day", col_extractDay($"Time"))
}
}
但是如果我将extractDay
放在Util.scala
和import main.scala.Util.Util.\u
中,它会突然抛出这个异常。。。为什么?
package main.scala.util
import java.time.DayOfWeek
import java.time.LocalDate
import java.time.format.DateTimeFormatter
import org.apache.spark.SparkContext._
import org.apache.spark.sql.functions._
object Util {
def extractDay : (String => String) = (date: String) => {
val date_parsed : String = date.split(" ")(0)
val formatter : DateTimeFormatter = DateTimeFormatter.ofPattern("M/d/yy")
val local : DayOfWeek = LocalDate.parse(date_parsed, formatter).getDayOfWeek()
local.toString
}
val col_extractDay = udf(extractDay)
}
例外消息是:
Exception in thread "main" java.lang.ExceptionInInitializerError
at main.scala.Task8$.main(Task8.scala:49)
at main.scala.Task8.main(Task8.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:736)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.UnsupportedOperationException: Schema for type Char is not supported
at org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:716)
at org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:654)
at org.apache.spark.sql.functions$.udf(functions.scala:2837)
at main.scala.util.Util$.<init>(Util.scala:130)
at main.scala.util.Util$.<clinit>(Util.scala)
... 11 more
线程“main”java.lang.ExceptionInInitializeError中的异常
位于main.scala.Task8$.main(Task8.scala:49)
位于main.scala.Task8.main(Task8.scala)
在sun.reflect.NativeMethodAccessorImpl.invoke0(本机方法)处
位于sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
在sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)中
位于java.lang.reflect.Method.invoke(Method.java:498)
位于org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:736)
位于org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)
位于org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)
位于org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)
位于org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
原因:java.lang.UnsupportedOperationException:不支持Char类型的架构
位于org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:716)
位于org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:654)
位于org.apache.spark.sql.functions$.udf(functions.scala:2837)
位于main.scala.util.util$(util.scala:130)
位于main.scala.util.util$(util.scala)
... 还有11个