Java 8 使用Java8流对文本文件进行分块
我试图将一个文本文件(比方说,一个日志文件)分块,以便一次只选择一定数量的行进行处理(比方说,我们将日志文件拆分为较小的行)。我以命令式风格编写了这段代码:Java 8 使用Java8流对文本文件进行分块,java-8,java-stream,Java 8,Java Stream,我试图将一个文本文件(比方说,一个日志文件)分块,以便一次只选择一定数量的行进行处理(比方说,我们将日志文件拆分为较小的行)。我以命令式风格编写了这段代码: package utils; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.function.Consumer; public class FileUtils { pu
package utils;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.function.Consumer;
public class FileUtils {
public static void main(String[] args) {
readFileInChunks("D:\\demo.txt", 10000, System.out::println);
}
public static void readFileInChunks(String filePath, int chunkSize, Consumer<StringBuilder> processor) {
try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {
StringBuilder lines = new StringBuilder();
String line, firstLine = null;
int i;
for (i = 0; (line = br.readLine()) != null; i++) {
if (firstLine == null)
firstLine = line;
lines.append(line + "\n");
if ((i + 1) % chunkSize == 0) {
processor.accept(lines);
lines = new StringBuilder(firstLine + "\n");
}
}
if (lines.toString() != "") {
processor.accept(lines);
}
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
package-utils;
导入java.io.BufferedReader;
导入java.io.FileReader;
导入java.io.IOException;
导入java.util.function.Consumer;
公共类文件{
公共静态void main(字符串[]args){
readFileInChunks(“D:\\demo.txt”,10000,System.out::println);
}
公共静态void readFileInChunks(字符串文件路径、int chunkSize、使用者处理器){
try(BufferedReader br=new BufferedReader(new FileReader(filePath))){
StringBuilder线条=新建StringBuilder();
字符串行,第一行=null;
int i;
对于(i=0;(line=br.readLine())!=null;i++){
if(firstLine==null)
第一行=第二行;
行。追加(行+“\n”);
如果((i+1)%chunkSize==0){
处理器。接受(行);
lines=新的StringBuilder(第一行+“\n”);
}
}
if(lines.toString()!=“”){
处理器。接受(行);
}
br.close();
}捕获(IOE异常){
e、 printStackTrace();
}
}
}
这些年来,我一直以迭代的方式进行编码,但我无法实现这种方法的基于Java8流的函数式实现
是否可以使readFileInChunks
方法返回块的流
?或者,以功能性的方式实现readFileInChunks
?您可以做的一件事是使用自定义收集器构建这些块,然后将它们发送给消费者,例如(未编译,只是一个示例):
私有静态最终类ToChunkCollector实现收集器{
私有最终整数块大小;
公共ToChunksCollector(int chunkSize){
this.chunkSize=chunkSize;
}
@凌驾
公共供应商(){
返回ArrayList::new;
}
@凌驾
公共双消费者累加器(){
返回(列表,行)->{
if(list.size()==0){
添加(新的StringBuilder());
}
StringBuilder lastBuilder=list.get(list.size()-1);
字符串[]linesInCurrentBuilder=lastBuilder.toString().split(“\n”);
//没有空间了
if(linesInCurrentBuilder.length==chunkSize){
字符串lastLine=linesInCurrentBuilder[chunkSize-1];
StringBuilder=新的StringBuilder();
builder.append(lastLine.append(“\n”);
列表。添加(生成器);
}否则{
lastBuilder.append(行).append(“\n”);
}
};
}
@凌驾
公共二进制运算符组合器(){
返回(列表1、列表2)->{
列表1.addAll(列表2);
返回列表1;
};
}
@凌驾
公共函数完成器(){
返回函数.identity();
}
//TODO添加相关字符
@凌驾
公共集特征(){
返回EnumSet.noneOf(Characteristics.class);
}
}
然后是用法:
public static void readFileInChunks(String filePath, int chunkSize, Consumer<StringBuilder> processor) {
try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {
List<StringBuilder> builder = br.lines().collect(new ToChunksCollector<>(chunkSize));
builder.stream().forEachOrdered(processor);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void readFileInChunks(字符串文件路径、int chunkSize、使用者处理器){
try(BufferedReader br=new BufferedReader(new FileReader(filePath))){
列表生成器=br.lines().collect(新的tochunkscoller(chunkSize));
builder.stream().forEachOrdered(处理器);
}捕获(IOE异常){
e、 printStackTrace();
}
}
您可以做的一件事是让一个自定义收集器构建这些块,然后将它们发送给消费者,例如(不是编译的,只是一个示例):
私有静态最终类ToChunkCollector实现收集器{
私有最终整数块大小;
公共ToChunksCollector(int chunkSize){
this.chunkSize=chunkSize;
}
@凌驾
公共供应商(){
返回ArrayList::new;
}
@凌驾
公共双消费者累加器(){
返回(列表,行)->{
if(list.size()==0){
添加(新的StringBuilder());
}
StringBuilder lastBuilder=list.get(list.size()-1);
字符串[]linesInCurrentBuilder=lastBuilder.toString().split(“\n”);
//没有空间了
if(linesInCurrentBuilder.length==chunkSize){
字符串lastLine=linesInCurrentBuilder[chunkSize-1];
StringBuilder=新的StringBuilder();
builder.append(lastLine.append(“\n”);
列表。添加(生成器);
}否则{
lastBuilder.append(行).append(“\n”);
}
};
}
@凌驾
公共二进制运算符组合器(){
返回(列表1、列表2)->{
列表1.addAll(列表2);
返回列表1;
};
}
@凌驾
公共函数完成器(){
返回函数.identity();
}
//TODO添加相关字符
@凌驾
公共集特征(){
返回EnumSet.noneOf(Characteristics.class);
}
}
然后是用法:
public static void readFileInChunks(String filePath, int chunkSize, Consumer<StringBuilder> processor) {
try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {
List<StringBuilder> builder = br.lines().collect(new ToChunksCollector<>(chunkSize));
builder.stream().forEachOrdered(processor);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void readFileInChunks(字符串文件路径、int chunkSize、使用者处理器){
try(BufferedReader br=new BufferedReader(new FileReader(filePath))){
列表生成器=br.lines().collect(新的tochunkscoller(chunkSize));
builder.stream().forEachOrdered(处理器);
}捕获(IOE异常){
e、 printStackTrace();
}
}
您可以定义自定义迭代器并基于它构造流:
public static Stream<String> readFileInChunks(String filePath, int chunkSize) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(filePath));
Iterator<String> iter = new Iterator<String>() {
String nextChunk = null;
@Override
public boolean hasNext() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < chunkSize; i++) {
try {
String nextLine = br.readLine();
if (nextLine == null) break;
sb.append(nextLine).append("\n");
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
if (sb.length() == 0) {
nextChunk = null;
return false;
} else {
nextChunk = sb.toString();
return true;
}
}
@Override
public String next() {
if (nextChunk != null || hasNext()) {
String chunk = nextChunk;
nextChunk = null;
return chunk;
} else {
throw new NoSuchElementException();
}
}
};
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
iter, Spliterator.ORDERED | Spliterator.NONNULL), false)
.onClose(() -> {
try {
br.close();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
}
publicstaticstreamreadfileinchunks(stringfilepath,intchunksize)抛出IOException{
BufferedReader br=新的BufferedR
public static Stream<String> readFileInChunks(String filePath, int chunkSize) throws IOException {
return new TreeMap<>(StreamUtils.zipWithIndex(Files.lines(Paths.get(filePath)))
.collect(Collectors.groupingBy(el -> el.getIndex() / chunkSize)))
.values().stream()
.map(list -> list.stream()
.map(el -> el.getValue())
.collect(Collectors.joining("\n")));
}
package com.grs.stackOverFlow.pack01;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.Optional;
import java.util.function.Consumer;
public class FileUtils {
private static long processed=1;
public static void main(String[] args) throws IOException {
readFileInChunks("src/com/grs/stackOverFlow/pack01/demo.txt", 3, System.out::println);
}
public static void readFileInChunks(String filePath, int chunkSize, Consumer<StringBuilder> processor) throws IOException {
List<String> lines = Files.readAllLines(Paths.get(filePath));
String firstLine=lines.get(0);
long splitCount=lines.size()<chunkSize?1:lines.size()/chunkSize;
for(int i=1;i<=splitCount;i++){
Optional<String> result=lines.stream()
.skip(processed)
.limit(chunkSize)
.reduce((a,b) -> {processed++; return a+ "\n"+ b;});
//reduce increments processed one less time as it starts with 2 element at a time
processed++;
processor.accept(new StringBuilder("chunk no. = " + i + "\n" + firstLine+ "\n"+ result.orElse("") ));
}
}
}
public static void readFileInChunks(
String filePath, int chunkSize, Consumer<? super CharSequence> processor) {
CharBuffer buf=CharBuffer.allocate(chunkSize);
try(FileReader r = new FileReader(filePath)) {
readMore: for(;;) {
while(buf.hasRemaining()) if(r.read(buf)<0) break readMore;
buf.flip();
int oldLimit=buf.limit();
for(int p=oldLimit-1; p>0; p--)
if(buf.charAt(p)=='\n' || buf.charAt(p)=='\r') {
buf.limit(p+1);
break;
}
processor.accept(buf);
buf.position(buf.limit()).limit(oldLimit);
buf.compact();
}
if(buf.position()>0) {
buf.flip();
processor.accept(buf);
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static Stream<String> fileInChunks(
String filePath, int chunkSize) throws IOException {
FileChannel ch=FileChannel.open(Paths.get(filePath), StandardOpenOption.READ);
CharsetDecoder dec = Charset.defaultCharset().newDecoder();
long size = (long)(ch.size()*dec.averageCharsPerByte());
Reader r = Channels.newReader(ch, dec, chunkSize);
return StreamSupport.stream(new Spliterators.AbstractSpliterator<String>(
(size+chunkSize-1)/chunkSize, Spliterator.ORDERED|Spliterator.NONNULL) {
CharBuffer buf=CharBuffer.allocate(chunkSize);
public boolean tryAdvance(Consumer<? super String> processor) {
CharBuffer buf=this.buf;
if(buf==null) return false;
boolean more=true;
while(buf.hasRemaining() && more) try {
if(r.read(buf)<0) more=false;
} catch(IOException ex) { throw new UncheckedIOException(ex); }
if(more) {
buf.flip();
int oldLimit=buf.limit();
for(int p=oldLimit-1; p>0; p--)
if(buf.charAt(p)=='\n' || buf.charAt(p)=='\r') {
buf.limit(p+1);
break;
}
processor.accept(buf.toString());
buf.position(buf.limit()).limit(oldLimit);
buf.compact();
return true;
}
this.buf=null;
if(buf.position()>0) {
buf.flip();
processor.accept(buf.toString());
return true;
}
return false;
}
}, false);
}