Apache flink flink streaming-cep找不到具有密钥的前一个共享缓冲区条目

Apache flink flink streaming-cep找不到具有密钥的前一个共享缓冲区条目,apache-flink,flink-streaming,flink-cep,Apache Flink,Flink Streaming,Flink Cep,我试图在Flink上运行cep,测试数据来自本地路径,一开始,我设置了文件的大小 1G,运行良好。但当我将文件大小设置为10G时,发生了一个错误 下面的问题 这是我的密码。塔克斯求救 公共类ReadFromFile{ 公共静态void main(字符串[]args)引发异常{ StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSys

我试图在Flink上运行cep,测试数据来自本地路径,一开始,我设置了文件的大小 1G,运行良好。但当我将文件大小设置为10G时,发生了一个错误 下面的问题

这是我的密码。塔克斯求救

公共类ReadFromFile{
公共静态void main(字符串[]args)引发异常{
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().disableSysoutLogging();
setRestartStrategy(RestartStrategies.fixedDelayRestart(41000));
env.enableCheckpointing(5000);//每5秒创建一个检查点
环境(6);;
File dir=新文件(System.getProperty(“user.dir”)+“/cep”);
///数据/工具/devlop/idea/flink测试/cep
System.out.println(dir.getPath());
如果(!dir.exists()){
dir.mkdir();
}
//从本地文件读取数据
//它看起来像下面
//016-04-20T00:04:35.155Z”、“10.170.236.226”、“2016-04-20T00:04:31.415Z 4982 TID oxvsomclk变更工人JID-34683abcb587e008153ce458信息:开始”
最终数据流messageStream=env.readTextFile(“文件:/”+dir);
//过滤数据
DataStream da=messageStream.filter(新的FilterFunction(){
@凌驾
公共布尔筛选器(字符串值)引发异常{
if(value!=null&&value.contains(“JID-”&&value.contains(“INFO:”)){
返回true;
}
返回false;
}
});
//格式数据
DataStream t3=da.map(新的MapFunction(){
@凌驾
公共Tuple4映射(字符串值)引发异常{
String[]info=value.split(“info:”);
如果(信息长度==2){
字符串[]jid=info[0]。拆分(“jid-”;
如果(jid.length==2){
返回新元组4(jid[0],jid[1].trim(),info[1],“”);
}
}
返回null;
}
});
//使元组成为事件
DataStream input=t3.map(新的MapFunction(){
@凌驾
公共事件映射(Tuple4值)引发异常{
返回新事件(value.f0、value.f1、value.f2、value.f3);
}
}).keyBy(新的KeySelector(){
@凌驾
公共字符串getKey(事件值)引发异常{
返回值.getName();
}
});
//设计模式包含(开始-->SendThirdPartWorker-->完成)
模式模式=模式。开始(“开始”)。其中(
新的FilterFunction(){
@凌驾
公共布尔筛选器(事件值)引发异常{
返回值.getPrice()包含(“开始”);//&&MD5Util.MD5(value.getMd5())=;
}
}).next(“send”).where(新过滤器函数(){
@凌驾
公共布尔筛选器(事件值)引发异常{
返回值.getPrice().contains(“SendThirdPartWorker”);/&&jidMap.get(value.getName())==value.getName();
}
}).followedBy(“完成”).where(新过滤器函数(){
@凌驾
公共布尔筛选器(事件值)引发异常{
返回值.getPrice()包含(“完成”);/&&a;
}
})。在(毫秒(1000))内;
final long mi1=new Date().getTime();
数据流结果=CEP.pattern(输入,pattern)。选择(
新的PatternSelectFunction(){
@凌驾
公共字符串选择(映射模式){
StringBuilder=新的StringBuilder();
append(dataComt(new Date().getTime(),mi1)+“”+pattern.get(“start”).getName())
.append(“--”).append(pattern.get(“send”).getPrice());
//.append(“--”).append(pattern.get(“done”).getPrice());
返回builder.toString();
}
});
result.writesText(dir+“result”,FileSystem.WriteMode.OVERWRITE);
环境执行(“从卡夫卡习俗中读取”);
}
公共静态字符串数据通信(长当前、长最后){
长c=(当前最后一次)/1000;
返回“\”读“+c+”s\”;
}
}

您使用的是哪个版本的Flink?我使用的是Flink 1.0.2,它可以运行2分钟并输出结果,然后失败。您可以共享用于运行此测试的文件吗?您解决过此问题吗?
Exception in thread "main" org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply$mcV$sp(JobManager.scala:716)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:662)
at org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$7.apply(JobManager.scala:662)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:41)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:401)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: java.lang.RuntimeException: Failure happened in filter function.
at org.apache.flink.cep.nfa.NFA.computeNextStates(NFA.java:292)
at org.apache.flink.cep.nfa.NFA.process(NFA.java:136)
at org.apache.flink.cep.operator.AbstractCEPPatternOperator.processEvent(AbstractCEPPatternOperator.java:93)
at org.apache.flink.cep.operator.AbstractCEPPatternOperator.processElement(AbstractCEPPatternOperator.java:69)
at org.apache.flink.cep.operator.KeyedCEPPatternOperator.processElement(KeyedCEPPatternOperator.java:147)
at org.apache.flink.streaming.runtime.io.StreamInputProcessor.processInput(StreamInputProcessor.java:168)
at org.apache.flink.streaming.runtime.tasks.OneInputStreamTask.run(OneInputStreamTask.java:65)
at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:225)
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:559)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.IllegalStateException: Could not find previous shared buffer entry with key: State(send, Normal, [
StateTransition(TAKE, done, with filter),
StateTransition(IGNORE, send),
]), value: cep.customer.Event_d@bd2b81a4 and timestamp: 1461851418716. This can indicate that the element belonging to the previous relation has been already pruned, even though you expect it to be still there.
at org.apache.flink.cep.nfa.SharedBuffer.put(SharedBuffer.java:104)
at org.apache.flink.cep.nfa.NFA.computeNextStates(NFA.java:269)
... 9 more
public class ReadFromFile {


public static void main(String[] args) throws Exception {


    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().disableSysoutLogging();
    env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
    env.enableCheckpointing(5000); // create a checkpoint every 5 secodns
    env.setParallelism(6);

    File dir = new File(System.getProperty("user.dir") + "/cep");
    ///data/tools/devlop/idea/flink-test/cep

    System.out.println(dir.getPath());
    if (!dir.exists()) {
        dir.mkdir();
    }

    //read data from local file
    //it looks like below
    //016-04-20T00:04:35.155Z","10.170.236.226","<p2-sidekiq> 2016-04-20T00:04:31.415Z 4982 TID-oxvsomclk AlterationWorker JID-34683abcb587e008153ce458 INFO: start"

    final DataStream<String> messageStream =env.readTextFile("file://"+dir);
    // filter data
    DataStream<String> da = messageStream.filter(new FilterFunction<String>() {
        @Override
        public boolean filter(String value) throws Exception {
            if (value !=null && value.contains(" JID-") && value.contains("INFO: ")) {
                return true;
            }
            return false;
        }
    });

    // format data
    DataStream<Tuple4<String, String, String,String>> t3 = da.map(new MapFunction<String, Tuple4<String, String,String, String>>() {
        @Override
        public Tuple4<String,String, String, String> map(String value) throws Exception {

            String[] info = value.split("INFO: ");
            if (info.length == 2) {
                String[] jid = info[0].split(" JID-");
                if (jid.length == 2){
                    return new Tuple4<String, String, String,String>(jid[0],jid[1].trim(), info[1],"");
                }
            }
            return null;
        }
    });


    //make tuple to event
    DataStream<Event_d> input = t3.map(new MapFunction<Tuple4<String, String,String, String>, Event_d>() {
        @Override
        public Event_d map(Tuple4<String, String,String, String> value) throws Exception {
            return new Event_d(value.f0, value.f1, value.f2,value.f3);
        }
    }).keyBy(new KeySelector<Event_d, String>() {

        @Override
        public String getKey(Event_d value) throws Exception {
            return value.getName();
        }
    });

    // design pattern contains (start --> SendThirdPartWorker --> done)
    Pattern<Event_d, ?> pattern= Pattern.<Event_d>begin("start").where(
            new FilterFunction<Event_d>() {
                @Override
                public boolean filter(Event_d value) throws Exception {
                    return value.getPrice().contains("start");//&& MD5Util.MD5(value.getMd5())==;
                }
            }).next("send").where(new FilterFunction<Event_d>() {
                 @Override
                     public boolean filter(Event_d value) throws Exception {
                     return value.getPrice().contains("SendThirdPartWorker");//&& jidMap.get(value.getName())==value.getName();
                }
    }).followedBy("done").where(new FilterFunction<Event_d>() {
        @Override
        public boolean filter(Event_d value) throws Exception {
            return value.getPrice().contains("done") ;//&& a;
        }
    }).within(milliseconds(1000));
    final long mi1 = new Date().getTime();


    DataStream<String> result = CEP.pattern(input, pattern).select(
            new PatternSelectFunction<Event_d, String>() {
        @Override
        public String select(Map<String, Event_d> pattern) {
            StringBuilder builder = new StringBuilder();

            builder.append(dataComt(new Date().getTime(),mi1)+" "+pattern.get("start").getName())
                    .append(" -- ").append(pattern.get("send").getPrice());
                    //.append("--").append(pattern.get("done").getPrice());

            return builder.toString();
        }
    });

    result.writeAsText(dir + "result", FileSystem.WriteMode.OVERWRITE);

    env.execute("Read from Kafka custom");
}

public static String dataComt(long current,long last) {
    long c = (current-last)/1000;
    return "\"read "+c+"s \"";
 }
}