Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/312.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
java.util.UUID的Spark数据集的不同行为_Java_Apache Spark_Apache Spark Sql - Fatal编程技术网

java.util.UUID的Spark数据集的不同行为

java.util.UUID的Spark数据集的不同行为,java,apache-spark,apache-spark-sql,Java,Apache Spark,Apache Spark Sql,我正在使用Spark 2.0.0,并使用SparkSession创建一个数据集。当我在createDataFrame方法中使用java.util.UUID时,它工作得很好。但当我将java.util.UUID作为Javabean中的一个字段,并且当我使用这个Javabean创建数据集时,它会给我scala.MatchError。请参阅下面的代码和控制台日志。有谁能告诉我这里发生了什么,以及如何在Javabean类中使用UUID创建Dataset。谢谢 UUIDTest.java public c

我正在使用Spark 2.0.0,并使用
SparkSession
创建一个
数据集。当我在
createDataFrame
方法中使用
java.util.UUID
时,它工作得很好。但当我将
java.util.UUID
作为Javabean中的一个字段,并且当我使用这个Javabean创建数据集时,它会给我
scala.MatchError
。请参阅下面的代码和控制台日志。有谁能告诉我这里发生了什么,以及如何在Javabean类中使用
UUID
创建
Dataset
。谢谢

UUIDTest.java

public class UUIDTest { 
  public static void main(String[] args) {
     SparkSession spark = SparkSession
              .builder()
              .appName("UUIDTest")
              .config("spark.sql.warehouse.dir", "/file:C:/temp")
              .master("local[2]")
              .getOrCreate();

     System.out.println("====> Create Dataset using UUID"); 

     //Working
     List<UUID> uuids = Arrays.asList(UUID.randomUUID(),UUID.randomUUID());      
     Dataset<Row> uuidSet = spark.createDataFrame(uuids, UUID.class);        
     uuidSet.show();

     System.out.println("====> Create Dataset using UserUUID"); 

     //Not Working
     List<UserUUID> userUuids = Arrays.asList(new UserUUID(UUID.randomUUID()),new UserUUID(UUID.randomUUID()));
     Dataset<Row> userUuidSet = spark.createDataFrame(userUuids, UserUUID.class);//Exception at this line        
     userUuidSet.show();    

     spark.stop();
   }
}
public class UserUUID implements Serializable{

private UUID uuid;

public UserUUID() {
}

public UserUUID(UUID uuid) {
    this.uuid = uuid;
}

public UUID getUuid() {
    return uuid;
}

public void setUuid(UUID uuid) {
    this.uuid = uuid;
  }
}
控制台输出

16/08/26 22:49:23 INFO SharedState: Warehouse path is '/file:C:/temp'.
====> Create Dataset using UUID
16/08/26 22:49:26 INFO CodeGenerator: Code generated in 248.230818 ms
16/08/26 22:49:26 INFO CodeGenerator: Code generated in 10.550477 ms
+--------------------+-------------------+
|leastSignificantBits|mostSignificantBits|
+--------------------+-------------------+
|-6786538026241948655|5045373365275148508|
|-9161219066266259673|6040751881536491488|
+--------------------+-------------------+

====> Create Dataset using UserUUID
Exception in thread "main" scala.MatchError: 4fa3941c-f312-4031-a61b-01f2acef751b (of class java.util.UUID)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:256)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:251)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$$anonfun$createToCatalystConverter$2.apply(CatalystTypeConverters.scala:403)
at org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1$$anonfun$apply$1.apply(SQLContext.scala:1106)
at org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1$$anonfun$apply$1.apply(SQLContext.scala:1106)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
at org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1.apply(SQLContext.scala:1106)
at org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1.apply(SQLContext.scala:1104)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$class.toStream(Iterator.scala:1322)
at scala.collection.AbstractIterator.toStream(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.toSeq(TraversableOnce.scala:298)
at scala.collection.AbstractIterator.toSeq(Iterator.scala:1336)
at org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:373)
at com.UUIDTest.main(UUIDTest.java:30)
16/08/26 22:49:26 INFO SparkContext: Invoking stop() from shutdown hook

在面对这个问题时,为了让它正常工作,我做了这么多的尝试,最后找到了唯一的解决方案,就是使用
list
而不是
list
,当我想使用UUID时,在java级别进行映射,方法是:
UUID.fromString(uuidStr)

我遇到了完全相同的问题。你找到解决办法了吗?