Apache flink 为什么上次匹配事件的摄取时间与CEP匹配和触发模式的时间之间的时间如此长?

Apache flink 为什么上次匹配事件的摄取时间与CEP匹配和触发模式的时间之间的时间如此长?,apache-flink,flink-cep,Apache Flink,Flink Cep,我写了一个非常样本的CEP模式,它只匹配三个序列的latters“abc”,然而,即使如此简单的模式,在最后匹配事件的摄取时间和CEP匹配模式和激发时间之间,Flink(测试1.5和1.4.2)花费了将近1秒(有时约0.5秒) 以下是测试结果,请点击节点;最后两个字段ingestionTimestamp和timestamp是上次匹配事件的摄取时间和CEP触发匹配模式的时间戳 我的问题是如何提高性能?是否可以设置CEP的setBufferTimeout之类的设置?我试图将缓冲超时设置为5毫秒,但没

我写了一个非常样本的CEP模式,它只匹配三个序列的latters“abc”,然而,即使如此简单的模式,在最后匹配事件的摄取时间和CEP匹配模式和激发时间之间,Flink(测试1.5和1.4.2)花费了将近1秒(有时约0.5秒)

以下是测试结果,请点击节点;最后两个字段ingestionTimestamp和timestamp是上次匹配事件的摄取时间和CEP触发匹配模式的时间戳

我的问题是如何提高性能?是否可以设置CEP的setBufferTimeout之类的设置?我试图将缓冲超时设置为5毫秒,但没有成功

测试结果:

3> Transport{prodId=411, from='a', to='b', ingestionTimestamp='1528384356501', timestamp='1528384357034'} Transport{prodId=411, from='b', to='c', ingestionTimestamp='1528384356502', timestamp='1528384357034'} Transport{prodId=411, from='c', to='d', ingestionTimestamp='1528384356505', timestamp='1528384357034'} 
3> Transport{prodId=415, from='a', to='b', ingestionTimestamp='1528384356530', timestamp='1528384357034'} Transport{prodId=415, from='b', to='c', ingestionTimestamp='1528384356532', timestamp='1528384357034'} Transport{prodId=415, from='c', to='d', ingestionTimestamp='1528384356534', timestamp='1528384357034'} 
3> Transport{prodId=419, from='a', to='b', ingestionTimestamp='1528384356549', timestamp='1528384357034'} Transport{prodId=419, from='b', to='c', ingestionTimestamp='1528384356549', timestamp='1528384357034'} Transport{prodId=419, from='c', to='d', ingestionTimestamp='1528384356554', timestamp='1528384357034'}    
代码如下:

public class RetailerExampleKafka {

private static final String LOCAL_KAFKA_BROKER = "localhost:9092";
// private static final String RIDE_SPEED_GROUP = "rideSpeedGroup";
/**
 * The initial source of our shipment.
 */
private static final String SRC = "a";

private static final Pattern<Transport, ?> pattern = Pattern.<Transport>begin("start")
        .where(new SimpleCondition<Transport>() {
            private static final long serialVersionUID = 314415972814127035L;

            @Override
            public boolean filter(Transport value) throws Exception {
                return Objects.equals(value.getFrom(), SRC);
            }
        }).next("middle").where(new SimpleCondition<Transport>() {
            private static final long serialVersionUID = 6664468385615273240L;

            @Override
            public boolean filter(Transport value) {
                return value.getFrom().startsWith("b");
            }
        }).next("end").where(new SimpleCondition<Transport>() {
            private static final long serialVersionUID = 5721311694340771858L;

            @Override
            public boolean filter(Transport value) {
                return value.getFrom().startsWith("c");
            }
        }).within(Time.milliseconds(5000));


public static void main(String[] args) throws Exception {

    //List<Transport> sampleData = new ArrayList<>();
    //sampleData.add(new Transport(1, "a", "b", 0L));
    //sampleData.add(new Transport(1, "b", "c", 1L));
    //sampleData.add(new Transport(1, "c", "d", 2L));

    // StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
    env.setBufferTimeout(5);
    env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
    env.getConfig().disableSysoutLogging();
   ParameterTool params = ParameterTool.fromArgs(args);
    String inputTopic = params.getRequired("input-topic");
    String groupID = params.getRequired("group-id");
    Long slide = Long.parseLong(params.getRequired("slide").trim());


    final int popThreshold = 1; // threshold for popular places
    env.getConfig().setAutoWatermarkInterval(1000);
    Properties kafkaProps = new Properties();
    //kafkaProps.setProperty("zookeeper.connect", LOCAL_ZOOKEEPER_HOST);
    kafkaProps.setProperty("bootstrap.servers", LOCAL_KAFKA_BROKER);
    kafkaProps.setProperty("group.id", "g111");
    kafkaProps.setProperty("auto.offset.reset", "earliest");
    // create a Kafka consumer
    FlinkKafkaConsumer011<Transport> consumer = new FlinkKafkaConsumer011<>(
            inputTopic,
            new TransportSchema(),
            kafkaProps);

    DataStream<Transport> rides = env.addSource(consumer)
            .keyBy(element -> element.getProductId())
            .process(new MatchFunction2());

    CEP.pattern(rides, pattern).flatSelect(new PatternFlatSelectFunction<Transport, String>() {
        private static final long serialVersionUID = -8972838879934875538L;

        @Override
        public void flatSelect(Map<String, List<Transport>> map, Collector<String> collector) throws Exception {
            StringBuilder str = new StringBuilder();
            for (Map.Entry<String, List<Transport>> entry : map.entrySet()) {
                for (Transport t : entry.getValue()) {
                    t.timestamp = System.currentTimeMillis();
                    str.append(t + " ");
                }
            }
            collector.collect(str.toString());
        }
    }).print();
    env.execute();
}

/**
 * Our input records. Each contains:
 * 1. the id of the product,
 * 2. the starting location of the shipment, and
 * 3. the final location of the shipment.
 */
public static class Transport {
    private final int prodId;
    private final String from;
    private final String to;
    private long timestamp;
    public long ingestionTimestamp;

    public Transport(int productId, String from, String to, long timestamp) {
        this.prodId = productId;
        this.from = from;
        this.to = to;
        this.timestamp = timestamp;
    }

    public int getProductId() {
        return prodId;
    }

    public String getFrom() {
        return from;
    }

    public String getTo() {
        return to;
    }

    public long getTimestamp() {
        return timestamp;
    }

    @Override
    public String toString() {
        return "Transport{" +
                "prodId=" + prodId +
                ", from='" + from + '\'' +
                ", to='" + to + '\'' +
                ", ingestionTimestamp='" + ingestionTimestamp + '\'' +
                ", timestamp='" + timestamp + '\'' +
                '}';
    }

    public static Transport fromString(String line) {
        String[] split = line.split(",");
        Transport transport = new Transport(Integer.valueOf(split[0]), split[1], split[2], Long.valueOf(split[3]));
        return transport;
    }
}

private static Tuple2<String, Integer> getLastDestinationAndStopCountForPattern(IterativeCondition.Context<Transport> ctx, String patternName) {
    return getLastDestinationAndStopCountForPattern(ctx.getEventsForPattern(patternName));
}

private static Tuple2<String, Integer> getLastDestinationAndStopCountForPattern(Iterable<Transport> events) {
    Tuple2<String, Integer> locationAndStopCount = new Tuple2<>("", 0);

    for (Transport transport : events) {
        locationAndStopCount.f0 = transport.getTo();
        locationAndStopCount.f1++;
    }
    return locationAndStopCount;
}


public static class MatchFunction2 extends ProcessFunction<Transport, Transport> {
    @Override
    public void open(Configuration config) {
    }

    @Override
    public void processElement(Transport ride, Context context, Collector<Transport> out) throws Exception {
        ride.ingestionTimestamp = context.timestamp();
        out.collect(ride);
    }

    @Override
    public void onTimer(long timestamp, OnTimerContext context, Collector<Transport> out) throws Exception {
    }
}
public class RetailerExampleKafka{
私有静态最终字符串LOCAL_KAFKA_BROKER=“localhost:9092”;
//专用静态最终字符串行驶速度组=“行驶速度组”;
/**
*我们货物的最初来源。
*/
私有静态最终字符串SRC=“a”;
私有静态最终模式=模式。开始(“开始”)
.where(新的SimpleCondition(){
私有静态最终长serialVersionUID=314415972814127035L;
@凌驾
公共布尔筛选器(传输值)引发异常{
返回Objects.equals(value.getFrom(),SRC);
}
}).next(“中间”).where(新的SimpleCondition(){
私有静态最终长serialVersionUID=666446838561527340L;
@凌驾
公共布尔过滤器(传输值){
返回值.getFrom().startsWith(“b”);
}
}).next(“end”).where(新的SimpleCondition(){
私有静态最终长SerialVersionId=5721316940771858L;
@凌驾
公共布尔过滤器(传输值){
返回值.getFrom().startsWith(“c”);
}
}).在(时间.毫秒(5000))内;
公共静态void main(字符串[]args)引发异常{
//List sampleData=new ArrayList();
//添加(新的传输(1,“a”,“b”,0L));
//添加(新的传输(1,“b”,“c”,1L));
//添加(新的传输(1,“c”,“d”,2L));
//StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
StreamExecutionEnvironment env=StreamExecutionEnvironment.createLocalEnvironment();
环境设置超时(5);
环境设置流时间特征(时间特征、摄取时间);
env.getConfig().disableSysoutLogging();
ParameterTool params=ParameterTool.fromArgs(args);
字符串inputTopic=params.getRequired(“输入主题”);
字符串groupID=params.getRequired(“组id”);
Long slide=Long.parseLong(params.getRequired(“slide”).trim());
final int-popThreshold=1;//热门位置的阈值
env.getConfig().setAutoWatermarkInterval(1000);
属性Kafkapprops=新属性();
//Kafkapprops.setProperty(“zookeeper.connect”,本地\ zookeeper \主机);
setProperty(“bootstrap.servers”,本地卡夫卡代理);
Kafkapprops.setProperty(“group.id”、“g111”);
Kafkapprops.setProperty(“auto.offset.reset”、“最早”);
//创建卡夫卡消费者
FlinkKafkaConsumer011消费者=新的FlinkKafkaConsumer011(
他说,
新的TransportSchema(),
卡夫卡普);
DataStream rides=env.addSource(消费者)
.keyBy(元素->元素.getProductId())
.进程(新的MatchFunction2());
CEP.pattern(骑乘,模式).flatSelect(新模式flatselectfunction(){
私有静态最终长serialVersionUID=-8972838879934875538L;
@凌驾
public void flatSelect(映射映射、收集器)引发异常{
StringBuilder str=新的StringBuilder();
对于(Map.Entry:Map.entrySet()){
对于(传输t:entry.getValue()){
t、 timestamp=System.currentTimeMillis();
str.append(t+“”);
}
}
collector.collect(str.toString());
}
}).print();
execute();
}
/**
*我们的输入记录。每个记录包含:
*1.产品的id,
*2.装运的起始位置,以及
*3.装运的最终地点。
*/
公共静态级交通{
私人最终int prodId;
来自的私有最终字符串;
私有最终字符串到;
私有长时间戳;
公共长摄取时间戳;
公共交通(int productId、字符串from、字符串to、长时间戳){
this.prodId=productId;
this.from=from;
这个;
this.timestamp=时间戳;
}
public int getProductId(){
返回prodId;
}
公共字符串getFrom(){
返乡;
}
公共字符串getTo(){
返回;
}
公共长getTimestamp(){
返回时间戳;
}
@凌驾
公共字符串toString(){
返回“传输{”+
“prodId=“+prodId+
“,from='”+from+'\''+
“,to='”+to+'\''+
“,InjectionTimestamp='”+InjectionTimestamp+'\''+
“,timestamp='”+timestamp+'\''+
'}';
}
公共静态传输从字符串(字符串行){
String[]split=line.split(“,”);
传输传输=新传输(Integer.valueOf(split[0])、split[1]、split[2]、Long.valueOf(split[3]);
返程运输;
}
}
私有静态元组2 getLastDestinationAndStopCountForPattern(IterativeCondition.Context ctx,字符串patternName){
返回getLastDestinationAndStopCountForPattern(ctx.getEventsForPattern(patternName));
}
私有静态元组2 getLastDestinationAndStopCountForPattern(Iterable事件){
Tuple2 locationAndStopCount=新的Tuple2(“,0);
运输部