Java 8 使用Java8流对文本文件进行分块

Java 8 使用Java8流对文本文件进行分块,java-8,java-stream,Java 8,Java Stream,我试图将一个文本文件(比方说,一个日志文件)分块,以便一次只选择一定数量的行进行处理(比方说,我们将日志文件拆分为较小的行)。我以命令式风格编写了这段代码: package utils; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.function.Consumer; public class FileUtils { pu

我试图将一个文本文件(比方说,一个日志文件)分块,以便一次只选择一定数量的行进行处理(比方说,我们将日志文件拆分为较小的行)。我以命令式风格编写了这段代码:

package utils;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.function.Consumer;

public class FileUtils {

    public static void main(String[] args) {
        readFileInChunks("D:\\demo.txt", 10000, System.out::println);
    }

    public static void readFileInChunks(String filePath, int chunkSize, Consumer<StringBuilder> processor) {
        try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {
            StringBuilder lines = new StringBuilder();

            String line, firstLine = null;
            int i;
            for (i = 0; (line = br.readLine()) != null; i++) {
                if (firstLine == null)
                    firstLine = line;

                lines.append(line + "\n");

                if ((i + 1) % chunkSize == 0) {
                    processor.accept(lines);
                    lines = new StringBuilder(firstLine + "\n");
                }
            }

            if (lines.toString() != "") {
                processor.accept(lines);
            }

            br.close();

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}
package-utils;
导入java.io.BufferedReader;
导入java.io.FileReader;
导入java.io.IOException;
导入java.util.function.Consumer;
公共类文件{
公共静态void main(字符串[]args){
readFileInChunks(“D:\\demo.txt”,10000,System.out::println);
}
公共静态void readFileInChunks(字符串文件路径、int chunkSize、使用者处理器){
try(BufferedReader br=new BufferedReader(new FileReader(filePath))){
StringBuilder线条=新建StringBuilder();
字符串行,第一行=null;
int i;
对于(i=0;(line=br.readLine())!=null;i++){
if(firstLine==null)
第一行=第二行;
行。追加(行+“\n”);
如果((i+1)%chunkSize==0){
处理器。接受(行);
lines=新的StringBuilder(第一行+“\n”);
}
}
if(lines.toString()!=“”){
处理器。接受(行);
}
br.close();
}捕获(IOE异常){
e、 printStackTrace();
}
}
}
这些年来,我一直以迭代的方式进行编码,但我无法实现这种方法的基于Java8流的函数式实现

是否可以使
readFileInChunks
方法返回块的
?或者,以功能性的方式实现
readFileInChunks

您可以做的一件事是使用自定义收集器构建这些块,然后将它们发送给消费者,例如(未编译,只是一个示例):

私有静态最终类ToChunkCollector实现收集器{
私有最终整数块大小;
公共ToChunksCollector(int chunkSize){
this.chunkSize=chunkSize;
}
@凌驾
公共供应商(){
返回ArrayList::new;
}
@凌驾
公共双消费者累加器(){
返回(列表,行)->{
if(list.size()==0){
添加(新的StringBuilder());
}
StringBuilder lastBuilder=list.get(list.size()-1);
字符串[]linesInCurrentBuilder=lastBuilder.toString().split(“\n”);
//没有空间了
if(linesInCurrentBuilder.length==chunkSize){
字符串lastLine=linesInCurrentBuilder[chunkSize-1];
StringBuilder=新的StringBuilder();
builder.append(lastLine.append(“\n”);
列表。添加(生成器);
}否则{
lastBuilder.append(行).append(“\n”);
}
};
}
@凌驾
公共二进制运算符组合器(){
返回(列表1、列表2)->{
列表1.addAll(列表2);
返回列表1;
};
}
@凌驾
公共函数完成器(){
返回函数.identity();
}
//TODO添加相关字符
@凌驾
公共集特征(){
返回EnumSet.noneOf(Characteristics.class);
}
}
然后是用法:

public static void readFileInChunks(String filePath, int chunkSize, Consumer<StringBuilder> processor) {
    try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {

        List<StringBuilder> builder = br.lines().collect(new ToChunksCollector<>(chunkSize));
        builder.stream().forEachOrdered(processor);

    } catch (IOException e) {
        e.printStackTrace();
    }
}
public static void readFileInChunks(字符串文件路径、int chunkSize、使用者处理器){
try(BufferedReader br=new BufferedReader(new FileReader(filePath))){
列表生成器=br.lines().collect(新的tochunkscoller(chunkSize));
builder.stream().forEachOrdered(处理器);
}捕获(IOE异常){
e、 printStackTrace();
}
}
您可以做的一件事是让一个自定义收集器构建这些块,然后将它们发送给消费者,例如(不是编译的,只是一个示例):

私有静态最终类ToChunkCollector实现收集器{
私有最终整数块大小;
公共ToChunksCollector(int chunkSize){
this.chunkSize=chunkSize;
}
@凌驾
公共供应商(){
返回ArrayList::new;
}
@凌驾
公共双消费者累加器(){
返回(列表,行)->{
if(list.size()==0){
添加(新的StringBuilder());
}
StringBuilder lastBuilder=list.get(list.size()-1);
字符串[]linesInCurrentBuilder=lastBuilder.toString().split(“\n”);
//没有空间了
if(linesInCurrentBuilder.length==chunkSize){
字符串lastLine=linesInCurrentBuilder[chunkSize-1];
StringBuilder=新的StringBuilder();
builder.append(lastLine.append(“\n”);
列表。添加(生成器);
}否则{
lastBuilder.append(行).append(“\n”);
}
};
}
@凌驾
公共二进制运算符组合器(){
返回(列表1、列表2)->{
列表1.addAll(列表2);
返回列表1;
};
}
@凌驾
公共函数完成器(){
返回函数.identity();
}
//TODO添加相关字符
@凌驾
公共集特征(){
返回EnumSet.noneOf(Characteristics.class);
}
}
然后是用法:

public static void readFileInChunks(String filePath, int chunkSize, Consumer<StringBuilder> processor) {
    try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {

        List<StringBuilder> builder = br.lines().collect(new ToChunksCollector<>(chunkSize));
        builder.stream().forEachOrdered(processor);

    } catch (IOException e) {
        e.printStackTrace();
    }
}
public static void readFileInChunks(字符串文件路径、int chunkSize、使用者处理器){
try(BufferedReader br=new BufferedReader(new FileReader(filePath))){
列表生成器=br.lines().collect(新的tochunkscoller(chunkSize));
builder.stream().forEachOrdered(处理器);
}捕获(IOE异常){
e、 printStackTrace();
}
}

您可以定义自定义迭代器并基于它构造流:

public static Stream<String> readFileInChunks(String filePath, int chunkSize) throws IOException {
    BufferedReader br = new BufferedReader(new FileReader(filePath));

    Iterator<String> iter = new Iterator<String>() {
        String nextChunk = null;

        @Override
        public boolean hasNext() {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < chunkSize; i++) {
                try {
                    String nextLine = br.readLine();
                    if (nextLine == null) break;
                    sb.append(nextLine).append("\n");
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
            }
            if (sb.length() == 0) {
                nextChunk = null;
                return false;
            } else {
                nextChunk = sb.toString();
                return true;
            }
        }

        @Override
        public String next() {
            if (nextChunk != null || hasNext()) {
                String chunk = nextChunk;
                nextChunk = null;
                return chunk;
            } else {
                throw new NoSuchElementException();
            }
        }
    };
    return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
            iter, Spliterator.ORDERED | Spliterator.NONNULL), false)
            .onClose(() -> {
                try {
                    br.close();
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
            });
}
publicstaticstreamreadfileinchunks(stringfilepath,intchunksize)抛出IOException{
BufferedReader br=新的BufferedR
public static Stream<String> readFileInChunks(String filePath, int chunkSize) throws IOException {
    return new TreeMap<>(StreamUtils.zipWithIndex(Files.lines(Paths.get(filePath)))
            .collect(Collectors.groupingBy(el -> el.getIndex() / chunkSize)))
            .values().stream()
            .map(list -> list.stream()
                    .map(el -> el.getValue())
                    .collect(Collectors.joining("\n")));
}
  package com.grs.stackOverFlow.pack01;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.Optional;
import java.util.function.Consumer;

    public class FileUtils {
        private static long processed=1;

        public static void main(String[] args) throws IOException {
            readFileInChunks("src/com/grs/stackOverFlow/pack01/demo.txt", 3, System.out::println);
        }

        public static void readFileInChunks(String filePath, int chunkSize, Consumer<StringBuilder> processor) throws IOException {

            List<String> lines = Files.readAllLines(Paths.get(filePath));
            String firstLine=lines.get(0);

            long splitCount=lines.size()<chunkSize?1:lines.size()/chunkSize;

            for(int i=1;i<=splitCount;i++){
                Optional<String> result=lines.stream()
                     .skip(processed)
                     .limit(chunkSize)
                     .reduce((a,b) -> {processed++; return a+ "\n"+ b;});
                //reduce increments processed one less time as it starts with 2 element at a time
                processed++;
                processor.accept(new StringBuilder("chunk no. = " + i +  "\n" + firstLine+ "\n"+ result.orElse("") ));
            }

        }

    }
public static void readFileInChunks(
    String filePath, int chunkSize, Consumer<? super CharSequence> processor) {

    CharBuffer buf=CharBuffer.allocate(chunkSize);
    try(FileReader r = new FileReader(filePath)) {
        readMore: for(;;) {
            while(buf.hasRemaining()) if(r.read(buf)<0) break readMore;
            buf.flip();
            int oldLimit=buf.limit();
            for(int p=oldLimit-1; p>0; p--)
                if(buf.charAt(p)=='\n' || buf.charAt(p)=='\r') {
                    buf.limit(p+1);
                    break;
                }
            processor.accept(buf);
            buf.position(buf.limit()).limit(oldLimit);
            buf.compact();
        }
        if(buf.position()>0) {
            buf.flip();
            processor.accept(buf);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}
public static Stream<String> fileInChunks(
        String filePath, int chunkSize) throws IOException {

    FileChannel ch=FileChannel.open(Paths.get(filePath), StandardOpenOption.READ);
    CharsetDecoder dec = Charset.defaultCharset().newDecoder();
    long size = (long)(ch.size()*dec.averageCharsPerByte());
    Reader r = Channels.newReader(ch, dec, chunkSize);
    return StreamSupport.stream(new Spliterators.AbstractSpliterator<String>(
            (size+chunkSize-1)/chunkSize, Spliterator.ORDERED|Spliterator.NONNULL) {
        CharBuffer buf=CharBuffer.allocate(chunkSize);
        public boolean tryAdvance(Consumer<? super String> processor) {
            CharBuffer buf=this.buf;
            if(buf==null) return false;
            boolean more=true;
            while(buf.hasRemaining() && more) try {
                if(r.read(buf)<0) more=false;
            } catch(IOException ex) { throw new UncheckedIOException(ex); }
            if(more) {
                buf.flip();
                int oldLimit=buf.limit();
                for(int p=oldLimit-1; p>0; p--)
                    if(buf.charAt(p)=='\n' || buf.charAt(p)=='\r') {
                        buf.limit(p+1);
                        break;
                    }
                processor.accept(buf.toString());
                buf.position(buf.limit()).limit(oldLimit);
                buf.compact();
                return true;
            }
            this.buf=null;
            if(buf.position()>0) {
                buf.flip();
                processor.accept(buf.toString());
                return true;
            }
            return false;
        }
    }, false);
}