Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/357.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/5.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 当多个线程共享相同的spark上下文时,spark应用程序不会停止_Java_Apache Spark - Fatal编程技术网

Java 当多个线程共享相同的spark上下文时,spark应用程序不会停止

Java 当多个线程共享相同的spark上下文时,spark应用程序不会停止,java,apache-spark,Java,Apache Spark,我试图重现我所面临的问题。我的问题陈述-文件夹中存在多个文件。我需要对每个文件进行字数统计并打印结果。每个文件都应该并行处理!当然,并行性是有限制的。我已经编写了以下代码来完成它。运行正常。群集正在安装mapR的spark。群集具有spark.scheduler.mode=FIFO Q1-是否有更好的方法来完成上述任务 问题2-我观察到应用程序即使在运行时也不会停止 已完成可用文件的字数计算。我不能 想办法对付它吗 package groupId.artifactId; 导入java.util.

我试图重现我所面临的问题。我的问题陈述-文件夹中存在多个文件。我需要对每个文件进行字数统计并打印结果。每个文件都应该并行处理!当然,并行性是有限制的。我已经编写了以下代码来完成它。运行正常。群集正在安装mapR的spark。群集具有spark.scheduler.mode=FIFO

Q1-是否有更好的方法来完成上述任务

问题2-我观察到应用程序即使在运行时也不会停止 已完成可用文件的字数计算。我不能 想办法对付它吗

package groupId.artifactId;
导入java.util.ArrayList;
导入java.util.Iterator;
导入java.util.List;
导入java.util.concurrent.ExecutionException;
导入java.util.concurrent.ExecutorService;
导入java.util.concurrent.Executors;
导入java.util.concurrent.Future;
导入java.util.concurrent.TimeUnit;
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaSparkContext;
公共类执行者{
/**
*@param args
*/
公共静态void main(字符串[]args){
最终int threadPoolSize=5;
SparkConf SparkConf=new SparkConf().setMaster(“纱线客户端”).setAppName(“跟踪器”).set(“spark.ui.port”,“0”);
JavaSparkContext jsc=新的JavaSparkContext(sparkConf);
ExecutorService executor=Executors.newFixedThreadPool(threadPoolSize);
List listOfFuture=new ArrayList();
对于(int i=0;i<20;i++){
if(listOfFuture.size()
}
package groupId.artifactId;
导入java.io.Serializable;
导入java.util.array;
导入java.util.concurrent.Callable;
导入org.apache.spark.api.java.javapairdd;
导入org.apache.spark.api.java.JavaRDD;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.api.java.function.FlatMapFunction;
导入org.apache.spark.api.java.function.Function2;
导入org.apache.spark.api.java.function.PairFunction;
导入scala.Tuple2;
公共类FlexiWordCount实现可调用、可序列化{
私有静态最终长serialVersionUID=1L;
私有JavaSparkContext jsc;
私有int文件ID;
公共FlexiWordCount(JavaSparkContext jsc,int fileId){
超级();
this.jsc=jsc;
this.fileId=fileId;
}
私有静态类缩减实现Function2{
@凌驾
公共整数调用(整数i1、整数i2){
返回i1+i2;
}
}
私有静态类KVPair实现PairFunction{
@凌驾
公共元组2调用(字符串参数)
抛出异常{
返回新的Tuple2(参数,1);
}
}
私有静态类平坦器实现FlatMapFunction{
@凌驾
公共Iterable调用(字符串s){
返回数组.asList(s.split(“”);
}
}
@凌驾
公共对象调用()引发异常{
javarddjrd=jsc.textFile(“/root/folder/experiment979/”+fileId+”.txt”);
System.out.println(“fileId=“+fileId”)的内部调用();
JavaRDD words=jrd.flatMap(新的Flatter());
javapairdd ones=words.mapToPair(新的KVPair());
javapairdd counts=one.reduceByKey(new reduce());
返回计数。collect();
}
}
}

为什么程序不能自动关闭

回答:您尚未关闭Sparkcontex,请尝试将main方法更改为:

public static void main(String[] args) {    
    final int threadPoolSize = 5;       
    SparkConf sparkConf = new SparkConf().setMaster("yarn-client").setAppName("Tracker").set("spark.ui.port","0");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf); 
    ExecutorService executor = Executors.newFixedThreadPool(threadPoolSize);
    List<Future> listOfFuture = new ArrayList<Future>();
    for (int i = 0; i < 20; i++) {
        if (listOfFuture.size() < threadPoolSize) {
            FlexiWordCount flexiWordCount = new FlexiWordCount(jsc, i);
            Future future = executor.submit(flexiWordCount);
            listOfFuture.add(future);               
        } else {
            boolean allFutureDone = false;
            while (!allFutureDone) {
                allFutureDone = checkForAllFuture(listOfFuture);
                System.out.println("Threads not completed yet!");
                try {
                    Thread.sleep(2000);//waiting for 2 sec, before next check
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            printFutureResult(listOfFuture);
            System.out.println("printing of future done");
            listOfFuture.clear();
            System.out.println("future list got cleared");
        }

    }
    try {
        executor.awaitTermination(5, TimeUnit.MINUTES);
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
   jsc.stop()
    }
publicstaticvoidmain(字符串[]args){
最终int threadPoolSize=5;
SparkConf SparkConf=新火花
package groupId.artifactId;

import java.io.Serializable;
import java.util.Arrays;
import java.util.concurrent.Callable;

import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;

import scala.Tuple2;

public class FlexiWordCount implements Callable<Object>,Serializable {


    private static final long serialVersionUID = 1L;
    private JavaSparkContext jsc;
    private int fileId;

    public FlexiWordCount(JavaSparkContext jsc, int fileId) {
        super();
        this.jsc = jsc;
        this.fileId = fileId;
    }
    private static class Reduction implements Function2<Integer, Integer, Integer>{
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    }

    private static class KVPair implements PairFunction<String, String, Integer>{
        @Override
        public Tuple2<String, Integer> call(String paramT)
                throws Exception {
            return new Tuple2<String, Integer>(paramT, 1);
        }
    }
    private static class Flatter implements FlatMapFunction<String, String>{

        @Override
        public Iterable<String> call(String s) {
            return Arrays.asList(s.split(" "));
        }
    }
    @Override
    public Object call() throws Exception { 
        JavaRDD<String> jrd = jsc.textFile("/root/folder/experiment979/" + fileId +".txt");
        System.out.println("inside call() for fileId = " + fileId);
        JavaRDD<String> words = jrd.flatMap(new Flatter());
        JavaPairRDD<String, Integer> ones = words.mapToPair(new KVPair());      
        JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Reduction());
        return counts.collect();
    }
}
}
public static void main(String[] args) {    
    final int threadPoolSize = 5;       
    SparkConf sparkConf = new SparkConf().setMaster("yarn-client").setAppName("Tracker").set("spark.ui.port","0");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf); 
    ExecutorService executor = Executors.newFixedThreadPool(threadPoolSize);
    List<Future> listOfFuture = new ArrayList<Future>();
    for (int i = 0; i < 20; i++) {
        if (listOfFuture.size() < threadPoolSize) {
            FlexiWordCount flexiWordCount = new FlexiWordCount(jsc, i);
            Future future = executor.submit(flexiWordCount);
            listOfFuture.add(future);               
        } else {
            boolean allFutureDone = false;
            while (!allFutureDone) {
                allFutureDone = checkForAllFuture(listOfFuture);
                System.out.println("Threads not completed yet!");
                try {
                    Thread.sleep(2000);//waiting for 2 sec, before next check
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            printFutureResult(listOfFuture);
            System.out.println("printing of future done");
            listOfFuture.clear();
            System.out.println("future list got cleared");
        }

    }
    try {
        executor.awaitTermination(5, TimeUnit.MINUTES);
    } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
   jsc.stop()
    }