Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/apache-spark/6.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/amazon-s3/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
并行Java火花枚举_Java_Apache Spark_Parallel Processing - Fatal编程技术网

并行Java火花枚举

并行Java火花枚举,java,apache-spark,parallel-processing,Java,Apache Spark,Parallel Processing,我在Java Spark中运行了以下代码: ZipFile zipFile = new ZipFile(zipFilePath); Enumeration<? extends ZipEnter> entries = zipFiles.entries(); while(entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); //my logic... } ZipFile-ZipFile=

我在Java Spark中运行了以下代码:

ZipFile zipFile = new ZipFile(zipFilePath);
Enumeration<? extends ZipEnter> entries = zipFiles.entries();
while(entries.hasMoreElements()) {
    ZipEntry entry = entries.nextElement();
    //my logic...
}
ZipFile-ZipFile=新的ZipFile(zipFilePath);

下面的枚举代码将分别在java和scala中并发处理枚举中每个条目的逻辑

在爪哇

entriesList = Collections.list(enumeration);
List<CompletableFuture<ZipEnter>> futureList = entriesList.stream().(x -> CompletableFuture. supplyAsync(() -> {
    //logic
}).collect(Collectors.toList());
CompletableFuture.allof(futureList);

希望有帮助。

下面的代码将分别在java和scala中并发处理枚举中每个条目的逻辑

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;

public class ParallelEnumeration {
    public static void main(String[] args) {
        String zipFilePath = "/ZipDir/";
        File zipFiles = new File(zipFilePath);
        final List<File> files = Arrays.asList(Objects.requireNonNull(zipFiles.listFiles()));
        // configure spark
        SparkConf sparkConf = new SparkConf().setAppName("Print Elements of RDD")
                .setMaster("local[*]");
        // start a spark context
        JavaSparkContext jsc = new JavaSparkContext(sparkConf);

        // parallelize the file collection to two partitions
        jsc.parallelize(files, 2)
                .filter(file -> { // This filter is optional if the directory contains only zip files
                    // https://stackoverflow.com/questions/33934178/how-to-identify-a-zip-file-in-java
                    DataInputStream in = new DataInputStream(new BufferedInputStream(new FileInputStream(file)));
                    int test = in.readInt();
                    in.close();
                    return test == 0x504b0304;
                }).foreach((VoidFunction<File>) file -> System.out.println(file.getName()));

    }
}
在爪哇

entriesList = Collections.list(enumeration);
List<CompletableFuture<ZipEnter>> futureList = entriesList.stream().(x -> CompletableFuture. supplyAsync(() -> {
    //logic
}).collect(Collectors.toList());
CompletableFuture.allof(futureList);
希望有帮助。

导入org.apache.spark.SparkConf;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;

public class ParallelEnumeration {
    public static void main(String[] args) {
        String zipFilePath = "/ZipDir/";
        File zipFiles = new File(zipFilePath);
        final List<File> files = Arrays.asList(Objects.requireNonNull(zipFiles.listFiles()));
        // configure spark
        SparkConf sparkConf = new SparkConf().setAppName("Print Elements of RDD")
                .setMaster("local[*]");
        // start a spark context
        JavaSparkContext jsc = new JavaSparkContext(sparkConf);

        // parallelize the file collection to two partitions
        jsc.parallelize(files, 2)
                .filter(file -> { // This filter is optional if the directory contains only zip files
                    // https://stackoverflow.com/questions/33934178/how-to-identify-a-zip-file-in-java
                    DataInputStream in = new DataInputStream(new BufferedInputStream(new FileInputStream(file)));
                    int test = in.readInt();
                    in.close();
                    return test == 0x504b0304;
                }).foreach((VoidFunction<File>) file -> System.out.println(file.getName()));

    }
}
导入org.apache.spark.api.java.JavaSparkContext; 导入org.apache.spark.api.java.function.VoidFunction; 导入java.io.BufferedInputStream; 导入java.io.DataInputStream; 导入java.io.File; 导入java.io.FileInputStream; 导入java.util.array; 导入java.util.List; 导入java.util.Objects; 公共类并行枚举{ 公共静态void main(字符串[]args){ 字符串zipFilePath=“/ZipDir/”; 文件zipFiles=新文件(zipFilePath); 最终列表文件=Arrays.asList(Objects.requirennull(zipFiles.listFiles()); //配置spark SparkConf SparkConf=new SparkConf().setAppName(“打印RDD的元素”) .setMaster(“本地[*]”); //创建一个火花上下文 JavaSparkContext jsc=新的JavaSparkContext(sparkConf); //将文件集合并行化为两个分区 并行化(文件,2) .filter(文件->{//如果目录仅包含zip文件,则此筛选器是可选的 // https://stackoverflow.com/questions/33934178/how-to-identify-a-zip-file-in-java DataInputStream in=新的DataInputStream(新的BufferedInputStream(新的FileInputStream(文件))); int test=in.readInt(); in.close(); 返回测试==0x504b0304; }).foreach((VoidFunction)file->System.out.println(file.getName()); } }
导入org.apache.spark.SparkConf;
导入org.apache.spark.api.java.JavaSparkContext;
导入org.apache.spark.api.java.function.VoidFunction;
导入java.io.BufferedInputStream;
导入java.io.DataInputStream;
导入java.io.File;
导入java.io.FileInputStream;
导入java.util.array;
导入java.util.List;
导入java.util.Objects;
公共类并行枚举{
公共静态void main(字符串[]args){
字符串zipFilePath=“/ZipDir/”;
文件zipFiles=新文件(zipFilePath);
最终列表文件=Arrays.asList(Objects.requirennull(zipFiles.listFiles());
//配置spark
SparkConf SparkConf=new SparkConf().setAppName(“打印RDD的元素”)
.setMaster(“本地[*]”);
//创建一个火花上下文
JavaSparkContext jsc=新的JavaSparkContext(sparkConf);
//将文件集合并行化为两个分区
并行化(文件,2)
.filter(文件->{//如果目录仅包含zip文件,则此筛选器是可选的
// https://stackoverflow.com/questions/33934178/how-to-identify-a-zip-file-in-java
DataInputStream in=新的DataInputStream(新的BufferedInputStream(新的FileInputStream(文件)));
int test=in.readInt();
in.close();
返回测试==0x504b0304;
}).foreach((VoidFunction)file->System.out.println(file.getName());
}
}

我应该在entriesList.stream()旁边写什么?entriesList.stream().allMatch?不,您根本不需要allMatch。您只需将//逻辑替换为任意逻辑即可。我正在尝试将
ZipEnter
列表转换为
CompletableFuture
列表。我应该在entriesList.stream()旁边写什么?entriesList.stream().allMatch?不,您根本不需要allMatch。您只需将//逻辑替换为任意逻辑即可。我只是想把
ZipEnter
列表转换成
CompletableFuture
列表。