Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/398.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java通过谓词将流拆分为流的流_Java_Split_Java Stream_Lazy Evaluation_Predicate - Fatal编程技术网

Java通过谓词将流拆分为流的流

Java通过谓词将流拆分为流的流,java,split,java-stream,lazy-evaluation,predicate,Java,Split,Java Stream,Lazy Evaluation,Predicate,我有数百个大型(6GB)gzip日志文件,我正在使用gzip输入流s阅读这些文件,我希望对它们进行解析。假设每个都有以下格式: Start of log entry 1 ...some log details ...some log details ...some log details Start of log entry 2 ...some log details ...some log details ...some log details

我有数百个大型(6GB)gzip日志文件,我正在使用
gzip输入流
s阅读这些文件,我希望对它们进行解析。假设每个都有以下格式:

Start of log entry 1
    ...some log details
    ...some log details
    ...some log details
Start of log entry 2
    ...some log details
    ...some log details
    ...some log details
Start of log entry 3
    ...some log details
    ...some log details
    ...some log details
我正在通过
BufferedReader.lines()
逐行传输gzip文件内容。该流看起来像:

[
    "Start of log entry 1",
    "    ...some log details",
    "    ...some log details",
    "    ...some log details",
    "Start of log entry 2",
    "    ...some log details",
    "    ...some log details",
    "    ...some log details",
    "Start of log entry 2",
    "    ...some log details",
    "    ...some log details",
    "    ...some log details",
]
每个日志项的开始可以通过谓词来标识:
line->line.startsWith(“日志项的开始”)
。我想根据这个谓词将这个
转换成
。每个“子流”应该在谓词为真时开始,并在谓词为假时收集行,直到下一次谓词为真时为止,这表示此子流的结束和下一个子流的开始。结果如下:

[
    [
        "Start of log entry 1",
        "    ...some log details",
        "    ...some log details",
        "    ...some log details",
    ],
    [
        "Start of log entry 2",
        "    ...some log details",
        "    ...some log details",
        "    ...some log details",
    ],
    [
        "Start of log entry 3",
        "    ...some log details",
        "    ...some log details",
        "    ...some log details",
    ],
]
从那里,我可以获取每个子流,并通过
newlogentry(streamloglines)
将其映射,以便将相关日志行聚合到
LogEntry
对象中

下面是一个大致的想法,看看会是什么样子:

import java.io.*;
import java.nio.charset.*;
import java.util.*;
import java.util.function.*;
import java.util.stream.*;

import static java.lang.System.out;

class Untitled {
    static final String input = 
        "Start of log entry 1\n" +
        "    ...some log details\n" +
        "    ...some log details\n" +
        "    ...some log details\n" +
        "Start of log entry 2\n" +
        "    ...some log details\n" +
        "    ...some log details\n" +
        "    ...some log details\n" +
        "Start of log entry 3\n" +
        "    ...some log details\n" +
        "    ...some log details\n" +
        "    ...some log details";

    static final Predicate<String> isLogEntryStart = line -> line.startsWith("Start of log entry"); 

    public static void main(String[] args) throws Exception {
        try (ByteArrayInputStream gzipInputStream
        = new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)); // mock for fileInputStream based gzipInputStream
             InputStreamReader inputStreamReader = new InputStreamReader( gzipInputStream ); 
             BufferedReader reader = new BufferedReader( inputStreamReader )) {

            reader.lines()
                .splitByPredicate(isLogEntryStart) // <--- What witchcraft should go here?
                .map(LogEntry::new)
                .forEach(out::println);
        }
    }
}
import java.io.*;
导入java.nio.charset.*;
导入java.util.*;
导入java.util.function.*;
导入java.util.stream.*;
导入静态java.lang.System.out;
类无标题{
静态最终字符串输入=
“日志项1的开始\n”+
“…某些日志详细信息\n”+
“…某些日志详细信息\n”+
“…某些日志详细信息\n”+
“日志条目2的开始\n”+
“…某些日志详细信息\n”+
“…某些日志详细信息\n”+
“…某些日志详细信息\n”+
“日志条目3的开始\n”+
“…某些日志详细信息\n”+
“…某些日志详细信息\n”+
“…一些日志详细信息”;
静态最终谓词isLogEntryStart=line->line.startsWith(“日志条目的开始”);
公共静态void main(字符串[]args)引发异常{
try(ByteArrayInputStream gzip输入流)
=new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));//基于gzipInputStream的fileInputStream模拟
InputStreamReader InputStreamReader=新的InputStreamReader(gzipInputStream);
BufferedReader reader=新的BufferedReader(inputStreamReader)){
reader.lines()

.splitByPredicate(isLogEntryStart)//我认为主要的问题是,您正在逐行读取,并试图在这些行之外创建一个
LogEntry
实例,而不是逐块读取(可能会覆盖许多行)

为此,您可以将(自Java 9起提供)与适当的正则表达式一起使用:

String input =
        "Start of log entry 1\n"        +
        "    ...some log details 1.1\n" +
        "    ...some log details 1.2\n" +
        "    ...some log details 1.3\n" +
        "Start of log entry 2\n"        +
        "    ...some log details 2.1\n" +
        "    ...some log details 2.2\n" +
        "    ...some log details 2.3\n" +
        "Start of log entry 3\n"        +
        "    ...some log details 3.1\n" +
        "    ...some log details 3.2\n" +
        "    ...some log details 3.3";

try (ByteArrayInputStream gzip = 
         new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
     InputStreamReader reader = new InputStreamReader(gzip);
     Scanner scanner = new Scanner(reader)) {

    String START = "Start of log entry \\d+";
    Pattern pattern = Pattern.compile(
            START + "(?<=" + START + ").*?(?=" + START + "|$)", 
            Pattern.DOTALL);

    scanner.findAll(pattern)
            .map(MatchResult::group)
            .map(s -> s.split("\\R"))
            .map(LogEntry::new)
            .forEach(System.out::println);

} catch (IOException e) {
    throw new UncheckedIOException(e);
}
字符串输入=
“日志项1的开始\n”+
“…某些日志详细信息1.1\n”+
“…某些日志详细信息1.2\n”+
“…某些日志详细信息1.3\n”+
“日志条目2的开始\n”+
“…某些日志详细信息2.1\n”+
“…某些日志详细信息2.2\n”+
“…某些日志详细信息2.3\n”+
“日志条目3的开始\n”+
“…某些日志详细信息3.1\n”+
“…某些日志详细信息3.2\n”+
“…一些日志详细信息3.3”;
try(ByteArrayInputStream gzip=
新的ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
InputStreamReader=新的InputStreamReader(gzip);
扫描仪=新扫描仪(读卡器)){
String START=“日志项的开始\\d+”;
Pattern=Pattern.compile(

开始+”(?Frederico的答案可能是解决这个特定问题的最好方法。在他最后一次思考自定义
拆分器之后,我将添加一个经过修改的答案版本,其中我建议使用自定义迭代器创建一个分块流。这种方法也适用于其他不是由输入读取器创建的流

public class StreamSplitter<T>
    implements Iterator<Stream<T>>
{
    private Iterator<T>  incoming;
    private Predicate<T> startOfNewEntry;
    private T            nextLine;

    public static <T> Stream<Stream<T>> streamOf(Stream<T> incoming, Predicate<T> startOfNewEntry)
    {
        Iterable<Stream<T>> iterable = () -> new StreamSplitter<>(incoming, startOfNewEntry);
        return StreamSupport.stream(iterable.spliterator(), false);
    }

    private StreamSplitter(Stream<T> stream, Predicate<T> startOfNewEntry)
    {
        this.incoming = stream.iterator();
        this.startOfNewEntry = startOfNewEntry;
        if (incoming.hasNext())
            nextLine = incoming.next();
    }

    @Override
    public boolean hasNext()
    {
        return nextLine != null;
    }

    @Override
    public Stream<T> next()
    {
        List<T> nextEntrysLines = new ArrayList<>();
        do
        {
            nextEntrysLines.add(nextLine);
        } while (incoming.hasNext()
                 && !startOfNewEntry.test((nextLine = incoming.next())));

        if (!startOfNewEntry.test(nextLine)) // incoming does not have next
            nextLine = null;

        return nextEntrysLines.stream();
    }
}
公共类流拆分器
实现迭代器
{
私有迭代器传入;
私有谓词startOfNewEntry;
私人T nextLine;
公共静态streamOf(流传入,谓词startOfNewEntry)
{
Iterable Iterable=()->新的StreamSplitter(传入,startOfNewEntry);
返回StreamSupport.stream(iterable.spliterator(),false);
}
专用StreamSplitter(流、谓词startOfNewEntry)
{
this.incoming=stream.iterator();
this.startOfNewEntry=startOfNewEntry;
if(incoming.hasNext())
nextLine=incoming.next();
}
@凌驾
公共布尔hasNext()
{
返回下一行!=null;
}
@凌驾
公共流下一个()
{
List nextEntrysLines=new ArrayList();
做
{
添加(nextLine);
}while(incoming.hasNext()
&&!startOfNewEntry.test((nextLine=incoming.next());
如果(!startOfNewEntry.test(nextLine))//传入没有下一个
nextLine=null;
返回nextEntrysLines.stream();
}
}
示例

public static void main(String[] args)
{
    Stream<String> flat = Stream.of("Start of log entry 1",
                                    "    ...some log details",
                                    "    ...some log details",
                                    "Start of log entry 2",
                                    "    ...some log details",
                                    "    ...some log details",
                                    "Start of log entry 3",
                                    "    ...some log details",
                                    "    ...some log details");

    StreamSplitter.streamOf(flat, line -> line.matches("Start of log entry.*"))
                  .forEach(logEntry -> {
                      System.out.println("------------------");
                      logEntry.forEach(System.out::println);
                  });
}

// Output
// ------------------
// Start of log entry 1
//     ...some log details
//     ...some log details
// ------------------
// Start of log entry 2
//     ...some log details
//     ...some log details
// ------------------
// Start of log entry 3
//     ...some log details
//     ...some log details
publicstaticvoidmain(字符串[]args)
{
Stream flat=Stream.of(“日志条目1的开始”,
“…一些日志详细信息”,
“…一些日志详细信息”,
“日志条目2的开始”,
“…一些日志详细信息”,
“…一些日志详细信息”,
“日志条目3的开始”,
“…一些日志详细信息”,
“…一些日志详细信息”);
StreamSplitter.streamOf(flat,line->line.matches(“日志条目的开始。*”))
.forEach(日志项->{
System.out.println(“------------------------”;
logEntry.forEach(System.out::println);
});
}
//输出
// ------------------
//日志条目1的开始
//…一些日志详细信息
//…一些日志详细信息
// ------------------
//日志条目的开始