原因:java.lang.RuntimeException:java.lang.String不是日期架构的有效外部类型
我正在尝试使用Spark SQL将一些Java对象数据插入MySQL。下面是我的简化代码:原因:java.lang.RuntimeException:java.lang.String不是日期架构的有效外部类型,java,apache-spark,apache-spark-sql,Java,Apache Spark,Apache Spark Sql,我正在尝试使用Spark SQL将一些Java对象数据插入MySQL。下面是我的简化代码: import java.io.Serializable; import java.sql.Date; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import lombok
import java.io.Serializable;
import java.sql.Date;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Model implements Serializable {
private String state;
private String title;
private String exportCntry;
private String filepath;
private String unit;
private String frequency;
private String Seasonal_adjust;
private Date Last_updated;
private static StructType structType = DataTypes.createStructType(new StructField[] {
DataTypes.createStructField("state", DataTypes.StringType, false),
DataTypes.createStructField("title", DataTypes.StringType, false),
DataTypes.createStructField("exportCntry", DataTypes.StringType, false),
DataTypes.createStructField("filepath", DataTypes.StringType, false),
DataTypes.createStructField("unit", DataTypes.StringType, false),
DataTypes.createStructField("frequency", DataTypes.StringType, false),
DataTypes.createStructField("Seasonal_adjust", DataTypes.StringType, false),
DataTypes.createStructField("Last_updated", DataTypes.DateType, false)
});
public static StructType getStructType() {
return structType;
}
public static void setStructType(StructType structType) {
Model.structType = structType;
}
public Object[] getAllValues() {
return new Object[] { state, title, exportCntry, filepath, unit, frequency, unit,
frequency, Seasonal_adjust, Last_updated
};
}
}
下面的代码是Spark SQL代码,它将上述Model
java对象插入MySQL
这个异常似乎与
java.sql.Date
对象API有关。Spark SQLDataframe.write
方法不起作用。我错过了一些过程了吗?根据您对过去问题的编辑,请注意,我们更喜欢这里的技术写作风格。我们轻轻地劝阻问候,希望你能帮助,谢谢,提前感谢,感谢信,问候,亲切的问候,签名,请你能帮助,闲聊的材料和缩写的txtspk,恳求,你被困多久了,投票建议,元评论等。只需解释你的问题,并展示你已经尝试了什么,你期望什么,实际上发生了什么。@Abra:谢谢你想在这里编辑。然而,斜体字的软件名称并不能使它们更清晰,而且会分散读者的注意力。请确保它们的包装正确。
import java.sql.Date;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.Queue;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
public class Processor {
private static List<String> testData = Arrays.asList(
"Alabama,ValueofExports,Israel,A/L/I/ALISRA052SCEN.csv,$,A,NSA,2018-06-26",
"Alaska,ValueofExports,Israel,A/K/AKISRA052SCEN.csv,$,A,NSA,2018-06-26",
"Arizona,ValueofExports,Israel,A/Z/I/AZISRA052SCEN.csv,$,A,NSA,2018-06-26",
"Arkansas,ValueofExports,Israel,A/R/I/ARISRA052SCEN.csv,$,A,NSA,2018-06-26",
"California,ValueofExports,Israel,C/A/I/CAISRA052SCEN.csv,$,A,NSA,2018-06-26",
"Colorado,ValueofExports,Israel,C/O/I/COISRA052SCEN.csv,$,A,NSA,2018-06-26",
"Connecticut,ValueofExports,Israel,C/T/I/CTISRA052SCEN.csv,$,A,NSA,2018-06-26",
"Delaware,ValueofExports,Israel,D/E/I/DEISRA052SCEN.csv,$,A,NSA,2018-06-26"
);
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
Properties jdbcProps = new Properties();
jdbcProps.put("user", "root");
jdbcProps.put("password", "password");
SparkSession spark = SparkSession.builder().master("local[*]").appName("Spark Kafka Test").getOrCreate();
JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate());
JavaStreamingContext jssc = new JavaStreamingContext(jsc, Durations.milliseconds(1000));
JavaRDD<String> rddTest = jsc.parallelize(testData);
Queue<JavaRDD<String>> queue = new LinkedList<JavaRDD<String>>();
queue.add(rddTest);
JavaDStream<String> jds = jssc.queueStream( queue );
JavaDStream<Model> jdm = jds.map(str -> {
String[] parameters = new String[str.split(",").length];
parameters = str.split(",");
Date date = Date.valueOf(parameters[7]);
Model data = new Model(parameters[0], parameters[1], parameters[2], parameters[3],
parameters[4], parameters[5], parameters[6], date);
return data;
});
jdm.map(e -> {
Row row = RowFactory.create(e.getAllValues());
return row;
}).foreachRDD(rdd -> {
Dataset<Row> df = spark.createDataFrame(rdd, Model.getStructType());
df.printSchema();
df.write()
.mode(SaveMode.Append)
.jdbc("jdbc:mysql://localhost:3306/test?characterEncoding=utf8&serverTimezone=Asia/Seoul",
"test", jdbcProps);
});
jssc.start();
jssc.awaitTermination();
jssc.close();
}
}
Caused by: java.lang.RuntimeException: java.lang.String is not a valid external type for schema of date
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.StaticInvoke_7$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:289)