在JAVA中将复杂的嵌套Json转换为Spark数据帧

在JAVA中将复杂的嵌套Json转换为Spark数据帧,java,json,apache-spark,dataframe,nested,Java,Json,Apache Spark,Dataframe,Nested,有人能帮我用Java代码将下面的JSON转换成Spark Dataframe吗 注意:它不是文件 逻辑: 听卡夫卡主题T1,读取RDD中的每条记录,并应用附加逻辑将结果数据转换为Json对象,然后将其写入卡夫卡中的另一个主题T2 T2结构如下 JSON: [ { "@tenant_id":"XYZ", "alarmUpdateTime":1526342400000, "alarm_id":"AB5C9123", "alarm_upd

有人能帮我用Java代码将下面的JSON转换成Spark Dataframe吗

注意:它不是文件

逻辑: 听卡夫卡主题T1,读取RDD中的每条记录,并应用附加逻辑将结果数据转换为Json对象,然后将其写入卡夫卡中的另一个主题T2

T2结构如下

JSON:

 [  
   {  
      "@tenant_id":"XYZ",
      "alarmUpdateTime":1526342400000,
      "alarm_id":"AB5C9123",
      "alarm_updates":[  
         {  
            "alarmField":"Severity",
            "new_value":"Minor",
            "old_value":"Major"
         },
         {  
            "alarmField":"state",
            "new_value":"UPDATE",
            "old_value":"NEW"
         }
      ],
      "aucID":"5af83",
      "inID":"INC15234567",
      "index":"test",
      "product":"test",
      "source":"ABS",
      "state":"NEW"
   }
]
创建的类:

    ClassAlarm{

        String @tenant_id;
        String alarm_id;
        .
        .
        List <AlarmUpdate> update;
        Get and Setter functions for all variables
    }

AlarmUpdate{

    String alarmField;
    String oldVal;
    String NewVal;

    Get and Setter functions for all variables
} 

AppClass{


     void static main(){
             Alarm alarmObj = new Alarm();
          //set values for variables in alarmObj.
           Dataset <Row> results = jobCtx.getSparkSession().createDataFrame(Arrays.asList(alarmObj), Alarm.class)

           //At this point seeing following errors.

      }

}
ClassAlarm{
字符串@tenant_id;
字符串报警标识;
.
.
名单更新;
所有变量的Get和Setter函数
}
警报更新{
字符串报警字段;
字符串oldVal;
字符串NewVal;
所有变量的Get和Setter函数
} 
应用类{
void static main(){
报警对象=新报警();
//为alarmObj中的变量设置值。
Dataset results=jobCtx.getSparkSession().createDataFrame(Arrays.asList(alarmObj)、Alarm.class)
//此时会看到以下错误。
}
}
错误:

2018-05-15 13:40:48错误JobScheduler-运行作业流时出错 作业1526406040000 ms.0 scala.MatchError: com.ca.AlarmUpdate。AlarmUpdate@48c8809b(阶级的) com.ca.AlarmUpdate.AlarmUpdate) 在org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:236)上 ~[spark-catalyst_2.11-2.2.0.罐:2.2.0] 在org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:231)上 ~[spark-catalyst_2.11-2.2.0.罐:2.2.0] 位于org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103) ~[spark-catalyst_2.11-2.2.0.罐:2.2.0] 位于org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter.toCatalystImpl(CatalystTypeConverters.scala:170) ~[spark-catalyst_2.11-2.2.0.罐:2.2.0] 位于org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter.toCatalystImpl(CatalystTypeConverters.scala:154) ~[spark-catalyst_2.11-2.2.0.罐:2.2.0] 位于org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103) ~[spark-catalyst_2.11-2.2.0.罐:2.2.0] 在org.apache.spark.sql.CatalystTypeConverters$$anonfun$createToCatalystConverters$2.apply上(CatalystTypeConverters.scala:379) ~[spark-catalyst_2.11-2.2.0.罐:2.2.0] 位于org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1$$anonfun$apply$1.apply(SQLContext.scala:1105) ~[spark-sql_2.11-2.2.0.jar:2.2.0] 位于org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1$$anonfun$apply$1.apply(SQLContext.scala:1105) ~[spark-sql_2.11-2.2.0.jar:2.2.0] 在scala.collection.TraversableLike$$anonfun$map$1.apply处(TraversableLike.scala:234) ~[jaf-sdk-2.4.0.jar:?] 在scala.collection.TraversableLike$$anonfun$map$1.apply处(TraversableLike.scala:234) ~[jaf-sdk-2.4.0.jar:?] 在scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) ~[jaf-sdk-2.4.0.jar:?] 位于scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186) ~[jaf-sdk-2.4.0.jar:?] 位于scala.collection.TraversableLike$class.map(TraversableLike.scala:234) ~[jaf-sdk-2.4.0.jar:?] 位于scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186) ~[jaf-sdk-2.4.0.jar:?] 位于org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1.apply(SQLContext.scala:1105) ~[spark-sql_2.11-2.2.0.jar:2.2.0] 位于org.apache.spark.sql.SQLContext$$anonfun$beansToRows$1.apply(SQLContext.scala:1103) ~[spark-sql_2.11-2.2.0.jar:2.2.0] 位于scala.collection.Iterator$$anon$11.next(Iterator.scala:409) ~[jaf-sdk-2.4.0.jar:?] 位于scala.collection.Iterator$class.toStream(Iterator.scala:1322) ~[jaf-sdk-2.4.0.jar:?] 位于scala.collection.AbstractIterator.toStream(Iterator.scala:1336) ~[jaf-sdk-2.4.0.jar:?] 位于scala.collection.TraversableOnce$class.toSeq(TraversableOnce.scala:298) ~[jaf-sdk-2.4.0.jar:?] 位于scala.collection.AbstractIterator.toSeq(Iterator.scala:1336) ~[jaf-sdk-2.4.0.jar:?] 位于org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:406) ~[spark-sql_2.11-2.2.0.jar:2.2.0] 位于com.ca.alarmupdates.alarmupdates.lambda$null$0(alarmupdates.java:85) ~[classes/:?] 在java.util.Arrays$ArrayList.forEach(Arrays.java:3880)~[?:1.8.0_161] 位于com.ca.alarmupdates.alarmupdates.lambda$main$f87f782d$1(alarmupdates.java:58) ~[classes/:?] 位于org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:272) ~[spark-streaming_2.11-2.2.0.jar:2.2.0] 位于org.apache.spark.streaming.api.java.JavaDStreamLike$$anonfun$foreachRDD$1.apply(JavaDStreamLike.scala:272) ~[spark-streaming_2.11-2.2.0.jar:2.2.0] 在org.apache.spark.streaming.dstream.dstream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(dstream.scala:628) ~[spark-streaming_2.11-2.2.0.jar:2.2.0] 在org.apache.spark.streaming.dstream.dstream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(dstream.scala:628) ~[spark-streaming_2.11-2.2.0.jar:2.2.0] 在org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:51) ~[spark-streaming_2.11-2.2.0.jar:2.2.0] 在org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51) ~[spark-streaming_2.11-2.2.0.jar:2.2.0] 在org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mc
import org.apache.spark.sql.SparkSession;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

static SparkSession spark = SparkSession.builder().master("local").appName("simple").getOrCreate();
static JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());

Dataset<Row> df = spark.read().json(sc.wholeTextFiles("path to json file").map(t -> t._2()));
df.show(false);
+----------+---------------+--------+--------------------------------------------+-----+-----------+-----+-------+------+-----+
|@tenant_id|alarmUpdateTime|alarm_id|alarm_updates                               |aucID|inID       |index|product|source|state|
+----------+---------------+--------+--------------------------------------------+-----+-----------+-----+-------+------+-----+
|XYZ       |1526342400000  |AB5C9123|[[Severity,Minor,Major], [state,UPDATE,NEW]]|5af83|INC15234567|test |test   |ABS   |NEW  |
+----------+---------------+--------+--------------------------------------------+-----+-----------+-----+-------+------+-----+
    String t1Record = "[\n" +
            "  {\n" +
            "    \"@tenant_id\":\"XYZ\",\n" +
            "    \"alarmUpdateTime\":1526342400000,\n" +
            "    \"alarm_id\":\"AB5C9123\",\n" +
            "    \"alarm_updates\":[\n" +
            "      {\n" +
            "        \"alarmField\":\"Severity\",\n" +
            "        \"new_value\":\"Minor\",\n" +
            "        \"old_value\":\"Major\"\n" +
            "      },\n" +
            "      {\n" +
            "        \"alarmField\":\"state\",\n" +
            "        \"new_value\":\"UPDATE\",\n" +
            "        \"old_value\":\"NEW\"\n" +
            "      }\n" +
            "    ],\n" +
            "    \"aucID\":\"5af83\",\n" +
            "    \"inID\":\"INC15234567\",\n" +
            "    \"index\":\"test\",\n" +
            "    \"product\":\"test\",\n" +
            "    \"source\":\"ABS\",\n" +
            "    \"state\":\"NEW\"\n" +
            "  }\n" +
            "]";
    JavaRDD<String> t1RecordRDD = sc.parallelize(Arrays.asList(t1Record));
    Dataset<Row> df = spark.read().json(t1RecordRDD);