Java ApacheSpark任务出现空指针异常
在ApacheSpark rdd作业中,我的任务未完成,并给出空指针异常Java ApacheSpark任务出现空指针异常,java,scala,apache-spark,cloudera,databricks,Java,Scala,Apache Spark,Cloudera,Databricks,在ApacheSpark rdd作业中,我的任务未完成,并给出空指针异常 Lost task 22.3 in stage 8.0 (TID 19700, 10.64.109.70): java.lang.NullPointerException at com.google.common.base.Preconditions.checkNotNull(Preconditions.java:210) at com.google.common.base.Optional
Lost task 22.3 in stage 8.0 (TID 19700, 10.64.109.70): java.lang.NullPointerException
at com.google.common.base.Preconditions.checkNotNull(Preconditions.java:210)
at com.google.common.base.Optional.of(Optional.java:85)
at org.apache.spark.api.java.JavaUtils$.optionToOptional(JavaUtils.scala:30)
at org.apache.spark.api.java.JavaPairRDD$$anonfun$leftOuterJoin$2.apply(JavaPairRDD.scala:564)
at org.apache.spark.api.java.JavaPairRDD$$anonfun$leftOuterJoin$2.apply(JavaPairRDD.scala:564)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$mapValues$1$$anonfun$apply$43$$anonfun$apply$44.apply(PairRDDFunctions.scala:755)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$mapValues$1$$anonfun$apply$43$$anonfun$apply$44.apply(PairRDDFunctions.scala:755)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388)
at scala.collection.convert.Wrappers$IteratorWrapper.hasNext(Wrappers.scala:29)
at com.my.SparkRDDHelper$18.call(SparkRDDHelper.java:457)
at com.my.SparkRDDHelper$18.call(SparkRDDHelper.java:1)
at org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$4$1.apply(JavaRDDLike.scala:159)
at org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$4$1.apply(JavaRDDLike.scala:159)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
代码如下:
public static JavaRDD<SchemaLess> mapPartitionForDeletedCode(
JavaPairRDD<String, Tuple2<Long, Optional<Long>>> parJavaRdd, final int rDays) {
return parJavaRdd.mapPartitions(new FlatMapFunction<Iterator<Tuple2<String, Tuple2<Long, Optional<Long>>>>, SchemaLess>() {
private static final long serialVersionUID = 5501482349294705299L;
@Override
public Iterable<SchemaLess> call(Iterator<Tuple2<String, Tuple2<Long, Optional<Long>>>> parTuple) throws Exception {
List<SchemaLess> theSchemaLess = new ArrayList<SchemaLess>();
while(parTuple != null && parTuple.hasNext())
{
Tuple2<String, Tuple2<Long, Optional<Long>>> theTuple = parTuple.next();
SchemaLess theData;
theData = new SchemaLess();
theData.setKey(theTuple._1);
theData.setAcode(Constants.ACODE);
theData.setTtl(compareTtl(theTuple._2._2.get().intValue(), rDays));
theSchemaLess.add(theData);
}
return theSchemaLess;
}
});
}
公共静态JavaRDD mapPartitionForDeletedCode(
javapairdd parJavaRdd,最终整数日){
返回parJavaRdd.mapPartitions(新的FlatMapFunction(){
私有静态最终长serialVersionUID=5501482349294705299L;
@凌驾
公共Iterable调用(迭代器部分调用)引发异常{
列出无模式=新建ArrayList();
while(parTuple!=null&&parTuple.hasNext())
{
Tuple2 theTuple=parTuple.next();
无图式的数据;
数据=新的无模式();
DATA.setKey(双工1);
setAcode(Constants.ACODE);
data.setTtl(compareTtl(tuple.\u 2.\u 2.get().intValue(),rDays));
添加(数据);
}
返回无格式文件;
}
});
}
为guava-18.0.jar添加maven依赖项并重新构建和部署只需在未管理空值场景的地方检查spark代码即可。
public static JavaRDD<SchemaLess> mapPartitionForDeletedCode(
JavaPairRDD<String, Tuple2<Long, Optional<Long>>> parJavaRdd, final int rDays) {
return parJavaRdd.mapPartitions(new FlatMapFunction<Iterator<Tuple2<String, Tuple2<Long, Optional<Long>>>>, SchemaLess>() {
private static final long serialVersionUID = 5501482349294705299L;
@Override
public Iterable<SchemaLess> call(Iterator<Tuple2<String, Tuple2<Long, Optional<Long>>>> parTuple) throws Exception {
List<SchemaLess> theSchemaLess = new ArrayList<SchemaLess>();
while(parTuple != null && parTuple.hasNext())
{
Tuple2<String, Tuple2<Long, Optional<Long>>> theTuple = parTuple.next();
SchemaLess theData;
theData = new SchemaLess();
theData.setKey(theTuple._1);
theData.setAcode(Constants.ACODE);
theData.setTtl(compareTtl(theTuple._2._2.get().intValue(), rDays));
theSchemaLess.add(theData);
}
return theSchemaLess;
}
});
}