Apache spark 在命令行上发出spark submit完成任务,但从不返回提示

Apache spark 在命令行上发出spark submit完成任务,但从不返回提示,apache-spark,Apache Spark,我正在使用spark(Java)读写数据库。我正在使用Spark的内置群集管理器。应用程序作为fat jar打包,并通过spark submit命令运行: "./spark-submit --class com.tte.site.sector.daily.main.Driver --master spark://ip-xxx-xx-xx-xx:7077 --deploy-mode client /home/ec2-user/jars-dir/site-sector-daily-1.0-jar-w

我正在使用spark(Java)读写数据库。我正在使用Spark的内置群集管理器。应用程序作为fat jar打包,并通过spark submit命令运行:

"./spark-submit --class com.tte.site.sector.daily.main.Driver --master spark://ip-xxx-xx-xx-xx:7077 --deploy-mode client /home/ec2-user/jars-dir/site-sector-daily-1.0-jar-with-dependencies.jar
任务运行正常,应用程序日志中没有异常,或者在命令行上发出spark submit时也没有异常。以下是运行spark submit时的全部标准输出:

./spark submit--class com.tte.site.sector.daily.main.Driver--masterspark://ip-xxx-xx-xx-xx:7077 --部署模式client/home/ec2 user/jars dir/site->>sector-daily-1.0-jar-with-dependencies.jar

2017年12月15日06:42:06信息任务集经理:于1962年8月30日(1/2)在861449毫秒内完成了0.0阶段(TID 0)中的任务0.0 2017年12月15日06:46:01信息调度程序:结果阶段0(foreach位于Driver.java:143)在1095.509秒内完成 2017年12月15日06:46:01信息任务经理:在xxx.xx.17.222(2/2)的1095438毫秒内完成了0.0阶段(TID 1)中的任务1.0 2017年12月15日06:46:01信息TaskSchedulerImpl:已从池中删除任务集0.0,其任务已全部完成 2017年12月15日06:46:01信息调度程序:作业0已完成:foreach位于驱动程序处。java:143,耗时1095.7685>15秒

就我所知,应用程序逻辑似乎已经完成,因为已经正确执行了相应的数据库更新,但Spark UI显示它仍在运行,当然Spark submit命令不会返回提示符,因此某些资源仍然存在。我尝试过使用JavaSparkContext.close(),虽然它有助于Spark UI显示作业已完成,但Spark submit命令仍然不会返回。我做错了什么

代码如下:

        public class Driver {

            private static final Logger logger = LoggerFactory.getLogger(Driver.class);

            public static void main(String[] args) {

                Cluster cluster = Cluster.builder().addContactPoint("xxx.xx.xx.xx").build();// aws local
                Session dbSession = cluster.connect("syringa");
                SparkConf conf = new SparkConf()
                        .setAppName("sector_site_hourly_daily_job")
                        .setMaster("spark://ip-172-31-29-81:7077");
                JavaSparkContext sc = new JavaSparkContext(conf);


                    // share with workers
                    final Broadcast<List<KpiEntity>> kpiFormulaEntityBroadcastVar = sc.broadcast(kpiFormulaEntity);
                    final Broadcast<Set<String>> inputCountersBroadcastVar = sc.broadcast(inputCountersInKPIFormulas);
                    final Broadcast<Map<Integer, List<Date>>> fromTohoursBroadcastVar = sc.broadcast(generateFromToHour2());
                    final Broadcast<Map<Integer, List<DateTime>>> fromTohoursSelectBroadcastVar = sc.broadcast(generateFromToHours());
                    final Broadcast<Map<Integer, List<DateTime>>> noneUTCDatesBroadcastVar = sc.broadcast(generateFromToHoursForSelecting());

                    // Alternate approach to cassandraTable - Sites-Sectors RDD
                    ResultSet siteSectorQueryResult = dbSession.execute("select * from kpi.site_sectors_zone_area"); 
                    List<Row> rows = siteSectorQueryResult.all();
                    List<SiteSectorsEntity> siteSectorsEntities = new ArrayList<>();
                    for(Row row: rows) {
                        SiteSectorsEntity siteSectorsEntity = new SiteSectorsEntity();
                        siteSectorsEntity.setSiteName(row.getString("site_name"));
                        siteSectorsEntity.setArea(row.getString("area"));
                        siteSectorsEntity.setLatitude(row.getString("latitude"));
                        siteSectorsEntity.setLongitude(row.getString("longitude"));
                        siteSectorsEntity.setSectorAzimuth(row.getMap("sector_azimuth", String.class, String.class));
                        siteSectorsEntity.setSectors(row.getList("sectors", String.class));
                        siteSectorsEntity.setZone(row.getString("zone"));
                        siteSectorsEntities.add(siteSectorsEntity);
                    }
                    logger.info("*** Number of SiteSectorEntities {}", siteSectorsEntities.size());

                    JavaRDD<SiteSectorsEntity> siteSectorsEntityRDD = sc.parallelize(siteSectorsEntities);


                    doWork(siteSectorsEntityRDD, kpiFormulaEntityBroadcastVar, inputCountersBroadcastVar, 
                            fromTohoursBroadcastVar, fromTohoursSelectBroadcastVar, noneUTCDatesBroadcastVar);
                } finally {
                     dbSession.close();
                }
            }


            public static void doWork(JavaRDD<SiteSectorsEntity> siteSectorsEntityRDD, Broadcast<List<KpiEntity>> kpiFormulaEntityBroadcastVar,
                    Broadcast<Set<String>> inputCountersBroadcastVar, Broadcast<Map<Integer, List<Date>>> fromTohoursBroadcastVar,
                    Broadcast<Map<Integer, List<DateTime>>> fromTohoursSelectBroadcastVar,
                    Broadcast<Map<Integer, List<DateTime>>> noneUTCBroadvaseVar)  {

                // Distribute to workers
                siteSectorsEntityRDD.foreach(new VoidFunction<SiteSectorsEntity>() {
                    private static final long serialVersionUID = 5219326359281542043L;

                    public void call(SiteSectorsEntity siteSectorsEntity) throws Exception { // remove throws
                        Cluster cluster = Cluster.builder().addContactPoint("xxx.xx.xx.xx").build();//aws local
                        Session dbSession = cluster.connect("syringa");
                        Map<Integer, List<Date>> fromTohours = fromTohoursBroadcastVar.value();
                        Map<Integer, List<DateTime>> fromTohoursSelect = fromTohoursSelectBroadcastVar.value();
                        Map<Integer, List<DateTime>> noneUTCDates = noneUTCBroadvaseVar.value();
                        DateTime now = new DateTime();

                        //omitting long line of code here...

                        logger.info("site-daily-counter-aggregation: compeleted for: {}", siteName);

                        logger.info("site_daily_kpi: starting daily kpi generation for {}", siteName);
                        String siteDailyAggregateValue;
                        Map<String, String> mapOfSiteToDailyKpiInputCountersTotal = new HashMap<>();
                        for(KpiEntity kpiEntity: kpiEntityList) {
                            for(String kpiInputCounter : kpiEntity.getFormulaCounterNames()) {
                                String Zfrom = fromTimestamp.toString();
                                String[] noZfrom = Zfrom.split("Z");
                                String from = noZfrom[0]+"-0800";
                                String Zto = toTimestamp.toString();
                                String[] noZto = Zto.split("Z");
                                String to  = noZto[0]+"-0800";
                                List<Row> siteDailyAggregate = 
                                        DBUtil.selectSiteDailyCounterAggregate(siteName, from, to, kpiInputCounter, dbSession);
                                siteDailyAggregateValue = siteDailyAggregate.get(0).getString("counter_agg_value");
                                mapOfSiteToDailyKpiInputCountersTotal.put(kpiInputCounter, siteDailyAggregateValue);
                            }
                            String kpiFormula = kpiEntity.getKpiFormula();
                            for(String counter:kpiEntity.getFormulaCounterNames()) {
                                kpiFormula = kpiFormula.replaceAll("\\b"+counter+"\\b", mapOfSectorsToKpiInputCountersTotal.get(counter));
                            }
                            System.out.println("site_daily_kpi: KPI FORMULA TO BE EVAL'd :: "+kpiFormula +" for hour::" +fromTimestamp);
                            // create a script engine manager
                            ScriptEngineManager factory = new ScriptEngineManager();
                            // create a Nashorn script engine
                            ScriptEngine engine = factory.getEngineByName("nashorn");
                            // evaluate KPI formula as a JavaScript statement
                            try {
                                String red = kpiEntity.getKpiStatusRed();
                                String green = kpiEntity.getKpiStatusGreen();
                                String yellow = kpiEntity.getKpiStatusYellow();
                                Map<String, String> thresholdMap = new HashMap<>();
                                thresholdMap.put("red", red);
                                thresholdMap.put("yellow", yellow);
                                thresholdMap.put("green", green);
                                String[] yellowRange = yellow.split("-"); // assuming that only yellow carries a multi-value (range)

                                BigDecimal dailyKpiValue = evaluateExpression(kpiFormula, engine);
                                if(compareIfLessThanOneAndGreaterThanZero(dailyKpiValue)) {
                                    dailyKpiValue = dailyKpiValue.setScale(1, RoundingMode.UP);
                                } else {
                                    dailyKpiValue = dailyKpiValue.setScale(0, RoundingMode.DOWN);
                                }
                                System.out.println("site_hourly_kpi: site Hourly "+kpiEntity.getKpiName()+"="+dailyKpiValue.setScale(0, RoundingMode.DOWN) +" "+kpiEntity.getMeasurementUnit());

                                String kpiStatusColor = determineKpiStatusColor(dailyKpiValue, red, green, yellowRange, engine);

                                // populate sector counter aggregation table for a counter
                                Insert insert = QueryBuilder.insertInto("kpi", "site_daily_kpi")
                                        .value("site_name", siteName)
                                        .value("area", siteSectorsEntity.getArea())
                                        .value("id", UUID.randomUUID())
                                        .value("kpi_name", kpiEntity.getKpiName())
                                        .value("kpi_status", kpiStatusColor)
                                        .value("kpi_value", dailyKpiValue.toString())
                                        .value("measurement", kpiEntity.getMeasurementUnit())
                                        .value("thresholds", thresholdMap)
                                        .value("time_stamp", isoFormat.parse(fromTimestamp.toString()))
                                        .value("category", kpiEntity.getCategory())
                                        .value("zone", siteSectorsEntity.getZone())
                                        ;
                                ResultSet results = dbSession.execute(insert);

                                 } catch (NumberFormatException nfe) {
                                     logger.info("site_daily_kpi: site hourly "+kpiEntity.getKpiName()+"="+0+" "+kpiEntity.getMeasurementUnit());
                                 } catch (ClassCastException cce) {
                                     logger.info("site_daily_kpi: ClassCastException site hourly: "+ siteName );
                                 }
                    }// ends site hourly kpi gen.
                    logger.info("site_daily_kpi: completed site daily kpi genertion for: {}",siteName);

            } finally {
                try {
                    logger.info("in finally - closing DB session.");
                    dbSession.close();
                } catch (Exception e) {
                    logger.error("Error during db session close", e.getMessage());
                }
            }
            }
         });
        }
        }
公共类驱动程序{
私有静态最终记录器Logger=LoggerFactory.getLogger(Driver.class);
公共静态void main(字符串[]args){
Cluster Cluster=Cluster.builder().addContactPoint(“xxx.xx.xx.xx”).build();//aws local
会话dbSession=cluster.connect(“syringa”);
SparkConf conf=新的SparkConf()
.setAppName(“部门、现场、小时、日工作”)
.setMaster(“spark://ip-172-31-29-81:7077");
JavaSparkContext sc=新的JavaSparkContext(conf);
//与工人分享
最终广播kfiformulaentitybroadcastvar=sc.Broadcast(kfiformulaentity);
最终广播inputCountersBroadcastVar=sc.Broadcast(InputCountersinkPi公式);
最终广播fromTohoursBroadcastVar=sc.Broadcast(generateFromToHour2());
最终广播FromToHours选择BroadcastVar=sc.Broadcast(generateFromToHours());
最终广播noneUTCDatesBroadcastVar=sc.Broadcast(generateFromToHoursForSelecting());
//cassandraTable站点的替代方法-区域RDD
ResultSet siteSectorQueryResult=dbSession.execute(“从kpi.site\u Sector\u zone\u area中选择*);
列表行=siteSectorQueryResult.all();
List siteSectorsEntities=new ArrayList();
用于(行:行){
SiteSectorsEntity SiteSectorsEntity=新建SiteSectorsEntity();
siteSectorsEntity.setSiteName(row.getString(“站点名称”);
siteSectorsEntity.setArea(row.getString(“area”);
siteSectorsEntity.setLatitude(row.getString(“纬度”));
siteSectorsEntity.setLongitude(row.getString(“经度”);
siteSectorsEntity.setSectorAximition(row.getMap(“扇区方位角”,String.class,String.class));
siteSectorsEntity.setSectors(row.getList(“sectors”,String.class));
siteSectorsEntity.setZone(row.getString(“zone”);
添加(siteSectorsEntity);
}
logger.info(“***SiteSectorEntities的数量{}”,SiteSectorEntities.size());
JavaRDD siteSectorsEntityRDD=sc.parallelize(siteSectorsEntities);
doWork(siteSectorsEntityRDD、kpiFormulaEntityBroadcastVar、inputCountersBroadcastVar、,
fromTohoursBroadcastVar、fromTohoursSelectBroadcastVar、noneUTCDatesBroadcastVar);
}最后{
dbSession.close();
}
}
公共静态无效工作(JavaRDD siteSectorsEntityRDD、广播kpiFormulaEntityBroadcastVar、,
广播输入计数器BroadcastVar,广播时间为ToHoursBroadcastVar,
从至小时广播选择BroadcastVar,
广播非外载(VASEVAR){
//分发给工人
siteSectorsEntityRDD.foreach(新的VoidFunction(){
私有静态最终长serialVersionUID=5219326359281542043L;
公共无效调用(SiteSectorsEntity SiteSectorsEntity)引发异常{//remove引发
Cluster Cluster=Cluster.builder().addContactPoint(“xxx.xx.xx.xx”).build();//aws local
会话dbSession=cluster.connect(“syringa”);
映射fromTohours=fromTohoursBroadcastVar.value();
映射fromTohoursSelect=fromTohoursSelectBroadcastVar.value();
Map noneUTCDates=noneUTCBroadvaseVar.value();
DateTime now=新的DateTime();
//省略这里的长代码行。。。
info(“站点每日计数器聚合:已完成:{}”,站点名称);
info(“site_daily_kpi:starting daily kpi generation for{}”,siteName);
字符串siteDailyAggregateValue;
站点地图DailykPiInputCountersTotal
   jstack 14073
    2015-12-18 06:30:46
    Full thread dump Java HotSpot(TM) 64-Bit Server VM (25.65-b01   mixed mode):

    "ForkJoinPool-3-worker-3" #73 daemon prio=5 os_prio=0 tid=0x00007f59801c6000 nid=0x37fd waiting on condition [0x00007f59741f8000]
       java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x00000000cead31b8> (a scala.concurrent.forkjoin.ForkJoinPool)
        at scala.concurrent.forkjoin.ForkJoinPool.scan(ForkJoinPool.java:2075)
        at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
        at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)

    "DestroyJavaVM" #72 prio=5 os_prio=0 tid=0x00007f59a8008800 nid=0x3718 waiting on condition [0x0000000000000000]
       java.lang.Thread.State: RUNNABLE

    "Attach Listener" #69 daemon prio=9 os_prio=0 tid=0x00007f5980220800 nid=0x376e waiting on condition [0x0000000000000000]
       java.lang.Thread.State: RUNNABLE

    "cluster1-nio-worker-1" #10 prio=5 os_prio=0 tid=0x00007f59a85a9000 nid=0x3727 runnable [0x00007f5984625000]
       java.lang.Thread.State: RUNNABLE
        at io.netty.channel.epoll.Native.epollWait0(Native Method)
        at io.netty.channel.epoll.Native.epollWait(Native.java:153)
        at io.netty.channel.epoll.EpollEventLoop.epollWait(EpollEventLoop.java:184)
        at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:209)
        at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
        at java.lang.Thread.run(Thread.java:745)

    "threadDeathWatcher-2-1" #15 daemon prio=1 os_prio=0 tid=0x00007f59780ca000 nid=0x3726 waiting on condition [0x00007f5984926000]
       java.lang.Thread.State: TIMED_WAITING (sleeping)
        at java.lang.Thread.sleep(Native Method)
        at io.netty.util.ThreadDeathWatcher$Watcher.run(ThreadDeathWatcher.java:137)
        at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137)
        at java.lang.Thread.run(Thread.java:745)

    "cluster1-timeouter-0" #11 prio=5 os_prio=0 tid=0x00007f597806b800 nid=0x3725 waiting on condition [0x00007f5984a27000]
       java.lang.Thread.State: TIMED_WAITING (sleeping)
        at java.lang.Thread.sleep(Native Method)
        at io.netty.util.HashedWheelTimer$Worker.waitForNextTick(HashedWheelTimer.java:461)
        at io.netty.util.HashedWheelTimer$Worker.run(HashedWheelTimer.java:360)
        at java.lang.Thread.run(Thread.java:745)

    "cluster1-nio-worker-0" #9 prio=5 os_prio=0 tid=0x00007f59a8584000 nid=0x3724 runnable [0x00007f5984b28000]
       java.lang.Thread.State: RUNNABLE
        at io.netty.channel.epoll.Native.epollWait0(Native Method)
        at io.netty.channel.epoll.Native.epollWait(Native.java:153)
        at io.netty.channel.epoll.EpollEventLoop.epollWait(EpollEventLoop.java:184)
        at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:209)
        at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
        at java.lang.Thread.run(Thread.java:745)

    "cluster1-scheduled-task-worker-0" #13 prio=5 os_prio=0 tid=0x00007f59a854e800 nid=0x3722 waiting on condition [0x00007f59ac103000]
       java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x00000000d59c70b8> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
        at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
        at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093)
        at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809)
        at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1067)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)

    "cluster1-connection-reaper-0" #8 prio=5 os_prio=0 tid=0x00007f59a8483800 nid=0x3721 waiting on condition [0x00007f59ac408000]
       java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x00000000d59c8f20> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
        at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
        at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093)
        at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809)
        at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1067)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)

    "Service Thread" #7 daemon prio=9 os_prio=0 tid=0x00007f59a80bc800 nid=0x371f runnable [0x0000000000000000]
       java.lang.Thread.State: RUNNABLE

    "C1 CompilerThread1" #6 daemon prio=9 os_prio=0 tid=0x00007f59a80af800 nid=0x371e waiting on condition [0x0000000000000000]
       java.lang.Thread.State: RUNNABLE

    "C2 CompilerThread0" #5 daemon prio=9 os_prio=0 tid=0x00007f59a80ad800 nid=0x371d waiting on condition [0x0000000000000000]
       java.lang.Thread.State: RUNNABLE

    "Signal Dispatcher" #4 daemon prio=9 os_prio=0 tid=0x00007f59a80ac000 nid=0x371c runnable [0x0000000000000000]
       java.lang.Thread.State: RUNNABLE

    "Finalizer" #3 daemon prio=8 os_prio=0 tid=0x00007f59a8074000 nid=0x371b in Object.wait() [0x00007f59ace39000]
       java.lang.Thread.State: WAITING (on object monitor)
        at java.lang.Object.wait(Native Method)
        - waiting on <0x00000000d5556950> (a java.lang.ref.ReferenceQueue$Lock)
        at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:143)
        - locked <0x00000000d5556950> (a java.lang.ref.ReferenceQueue$Lock)
        at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:164)
        at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:209)

    "Reference Handler" #2 daemon prio=10 os_prio=0 tid=0x00007f59a8072000 nid=0x371a in Object.wait() [0x00007f59acf3a000]
       java.lang.Thread.State: WAITING (on object monitor)
        at java.lang.Object.wait(Native Method)
        - waiting on <0x00000000d55563d0> (a java.lang.ref.Reference$Lock)
        at java.lang.Object.wait(Object.java:502)
        at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:157)
        - locked <0x00000000d55563d0> (a java.lang.ref.Reference$Lock)

    "VM Thread" os_prio=0 tid=0x00007f59a806d000 nid=0x3719 runnable 

    "VM Periodic Task Thread" os_prio=0 tid=0x00007f59a80c0000 nid=0x3720 waiting on condition 

    JNI global references: 278