Apache flink 弗林克什么时候到期;来自Queryablestate的s时间窗口结果?
我已经用翻滚窗口和QueryableState实现了Total WordCount示例 我使用了10秒时间窗口,当我打印结果时,它会显示正确的结果,但当我使用queryable状态并使用QueryableClient进行查询时,它会缓存时间窗口的最后一个结果,即使时间窗口发生更改 e、 g,对于时间窗口11:00:01到11:00:10,“Nirav”的字数为5 当我在时间11:00:50上查询“Nirav”时,它返回之前的计数5 所以我有两个问题:Apache flink 弗林克什么时候到期;来自Queryablestate的s时间窗口结果?,apache-flink,Apache Flink,我已经用翻滚窗口和QueryableState实现了Total WordCount示例 我使用了10秒时间窗口,当我打印结果时,它会显示正确的结果,但当我使用queryable状态并使用QueryableClient进行查询时,它会缓存时间窗口的最后一个结果,即使时间窗口发生更改 e、 g,对于时间窗口11:00:01到11:00:10,“Nirav”的字数为5 当我在时间11:00:50上查询“Nirav”时,它返回之前的计数5 所以我有两个问题: 这是Flink的QueryableState
int sec = 10;
Time seconds = Time.seconds(sec);
text.flatMap(new FlatMapFunction<String, WordWithCount>() {
public void flatMap(String value, Collector<WordWithCount> out) {
for (String word : value.split("\\s")) {
out.collect(new WordWithCount(word, 1L));
}
}
})
.keyBy("word")
.timeWindow(seconds)
.reduce(new ReduceFunction<WordWithCount>() {
public WordWithCount reduce(WordWithCount a, WordWithCount b) {
System.out.println("After time window fun:- a.word:" + a.word + ", a.count:" + a.count + ", b.word:" + b.word + ", b.count:" + b.count);
return new WordWithCount(a.word, a.count + b.count);
}
})
.keyBy(wordWithCount -> wordWithCount.word)
.asQueryableState("wordCountQuery", valueStateDescriptor)
int-sec=10;
时间秒=时间秒(秒);
text.flatMap(新的flatMap函数(){
公共void平面图(字符串值,收集器输出){
for(字符串字:value.split(\\s))){
out.collect(新单词with count(单词,1L));
}
}
})
.keyBy(“word”)
.时间窗口(秒)
.reduce(新的ReduceFunction(){
公共单词WithCount reduce(单词WithCount a、单词WithCount b){
System.out.println(“时间窗口后的乐趣:-a.word:+a.word+”,a.count:+a.count+,b.word:+b.word+,b.count:+b.count”);
使用count返回新词(a.word,a.count+b.count);
}
})
.keyBy(wordWithCount->wordWithCount.word)
.asQueryableState(“wordCountQuery”,valueStateDescriptor)
整体实施
socketwindwindoworcountwithqueryablestatewithtimewindow.java
包com.nirav.modi
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.state.ReducingStateDescriptor;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
public class SocketWindowWordCountWithQueryableStateWithTimeWindow {
public static void main(String[] args) throws Exception {
// the port to connect to
final int port;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
port = params.getInt("port");
} catch (Exception e) {
System.err.println("No port specified. Please run 'SocketWindowWordCount --port <port>'");
return;
}
// get the execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(10000, CheckpointingMode.EXACTLY_ONCE);
// get input data by connecting to the socket
DataStream<String> text = env.socketTextStream("localhost", port);
ReduceFunction<WordWithCount> reduceFunction = new ReduceFunction<WordWithCount>() {
public WordWithCount reduce(WordWithCount a, WordWithCount b) {
System.out.println("reduce fun:- a.word:" + a.word + ", a.count:" + a.count + ", b.word:" + b.word + ", b.count:" + b.count);
return new WordWithCount(a.word, a.count + b.count);
}
};
// ReducingStateDescriptor<WordWithCount> descriptor = new ReducingStateDescriptor<WordWithCount>("wordCountQuery", reduceFunction, WordWithCount.class);
ValueStateDescriptor<WordWithCount> valueStateDescriptor = new ValueStateDescriptor<WordWithCount>("wordCountQuery", WordWithCount.class);
int sec = 10;
Time seconds = Time.seconds(sec);
text.flatMap(new FlatMapFunction<String, WordWithCount>() {
public void flatMap(String value, Collector<WordWithCount> out) {
for (String word : value.split("\\s")) {
out.collect(new WordWithCount(word, 1L));
}
}
})
.keyBy("word")
.timeWindow(seconds)
.reduce(new ReduceFunction<WordWithCount>() {
public WordWithCount reduce(WordWithCount a, WordWithCount b) {
System.out.println("After time window fun:- a.word:" + a.word + ", a.count:" + a.count + ", b.word:" + b.word + ", b.count:" + b.count);
return new WordWithCount(a.word, a.count + b.count);
}
}).keyBy(wordWithCount -> wordWithCount.word)
.asQueryableState("wordCountQuery", valueStateDescriptor);
env.getConfig().enableSysoutLogging();
JobGraph jobGraph = env.getStreamGraph().getJobGraph();
System.out.println("[info] Window WordCount with Time Window Job ID: " + jobGraph.getJobID());
System.out.println();
env.execute("Socket Window WordCount with Time Window of " + sec + " seconds");
}
// Data type for words with count
public static class WordWithCount {
public String word;
public long count;
public WordWithCount() {
}
public WordWithCount(String word, long count) {
this.word = word;
this.count = count;
}
@Override
public String toString() {
return word + " : " + count;
}
}
}
import org.apache.flink.api.common.functions.FlatMapFunction;
导入org.apache.flink.api.common.functions.ReduceFunction;
导入org.apache.flink.api.common.state.ReduceingStateDescriptor;
导入org.apache.flink.api.common.state.ValueStateDescriptor;
导入org.apache.flink.api.java.utils.ParameterTool;
导入org.apache.flink.runtime.jobgraph.jobgraph;
导入org.apache.flink.streaming.api.CheckpointingMode;
导入org.apache.flink.streaming.api.datastream.datastream;
导入org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
导入org.apache.flink.streaming.api.windowing.time.time;
导入org.apache.flink.util.Collector;
公共类SocketWindows或CountWithQueryableStateWithTimeWindow{
公共静态void main(字符串[]args)引发异常{
//要连接到的端口
最终国际端口;
试一试{
final ParameterTool params=ParameterTool.fromArgs(args);
端口=params.getInt(“端口”);
}捕获(例外e){
System.err.println(“未指定端口。请运行'SocketWindowWordCount--port'”;
返回;
}
//获取执行环境
最终StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
环境启用检查点(10000,检查点模式。仅启用一次);
//通过连接到套接字获取输入数据
DataStream text=env.socketTextStream(“localhost”,端口);
ReduceFunction ReduceFunction=新的ReduceFunction(){
公共单词WithCount reduce(单词WithCount a、单词WithCount b){
System.out.println(“减少乐趣:-a.word:+a.word+”,a.count:+a.count+”,b.word:+b.word+”,b.count:+b.count);
使用count返回新词(a.word,a.count+b.count);
}
};
//ReductionStateDescriptor描述符=新的ReductionStateDescriptor(“wordCountQuery”,ReductionFunction,WordWithCount.class);
ValueStateDescriptor ValueStateDescriptor=新的ValueStateDescriptor(“wordCountQuery”,WordWithCount.class);
整数秒=10;
时间秒=时间秒(秒);
text.flatMap(新的flatMap函数(){
公共void平面图(字符串值,收集器输出){
for(字符串字:value.split(\\s))){
out.collect(新单词with count(单词,1L));
}
}
})
.keyBy(“word”)
.时间窗口(秒)
.reduce(新的ReduceFunction(){
公共单词WithCount reduce(单词WithCount a、单词WithCount b){
System.out.println(“时间窗口后的乐趣:-a.word:+a.word+”,a.count:+a.count+,b.word:+b.word+,b.count:+b.count”);
使用count返回新词(a.word,a.count+b.count);
}
}).keyBy(wordWithCount->wordWithCount.word)
.asQueryableState(“wordCountQuery”,valueStateDescriptor);
env.getConfig().enableSysoutLogging();
JobGraph JobGraph=env.getStreamGraph().getJobGraph();
System.out.println(“[info]窗口字数,时间窗口作业ID:”+jobGraph.getJobID());
System.out.println();
环境执行(“套接字窗口字计数,时间窗口为“+秒+”秒”);
}
//带计数的字的数据类型
公共静态类WordWithCount{
公共字符串;
公众长时间计数;
PublicWordWithCount(){
}
公用字WithCount(字符串字、长计数){
这个单词=单词;
this.count=计数;
}
@凌驾
公共字符串toString(){
返回单词+“:”+计数;
}
}
}
QueryStateWithWindowTest.java
package com.nirav.modi;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.queryablestate.client.QueryableStateClient;
import scala.tools.jline_embedded.console.ConsoleReader;
import java.io.PrintWriter;
import java.net.UnknownHostException;
import java.util.concurrent.CompletableFuture;
public class QueryStateWithWindowTest {
public static void main(String[] args) throws Exception {
// the jobId to connect to
final String jobId;
final String queryableStateName;
try {
final ParameterTool params = ParameterTool.fromArgs(args);
jobId = params.get("jobId");
queryableStateName = params.get("queryableStateName");
} catch (Exception e) {
System.err.println("No jobId specified. Please run 'SocketWindowWordCount --jobId <jobId>'");
return;
}
try {
ValueStateDescriptor<SocketWindowWordCountWithQueryableStateWithTimeWindow.WordWithCount> valueStateDescriptor = new ValueStateDescriptor<SocketWindowWordCountWithQueryableStateWithTimeWindow.WordWithCount>("wordCountQuery", SocketWindowWordCountWithQueryableStateWithTimeWindow.WordWithCount.class);
QueryableStateClient client = new QueryableStateClient("truecomtelesoft", 9069);
ExecutionConfig config = new ExecutionConfig();
client.setExecutionConfig(config.enableClosureCleaner());
ConsoleReader reader = new ConsoleReader();
reader.setPrompt("$ ");
PrintWriter out = new PrintWriter(reader.getOutput());
String line;
while ((line = reader.readLine()) != null) {
String key = line.toLowerCase().trim();
out.printf("[info] Querying key '%s'\n", key);
try {
long start = System.currentTimeMillis();
CompletableFuture<ValueState<SocketWindowWordCountWithQueryableStateWithTimeWindow.WordWithCount>> kvState = client.getKvState(JobID.fromHexString(jobId), queryableStateName, key, BasicTypeInfo.STRING_TYPE_INFO, valueStateDescriptor);
try {
SocketWindowWordCountWithQueryableStateWithTimeWindow.WordWithCount wordWithCount = kvState.get().value();
long end = System.currentTimeMillis();
long duration = Math.max(0, end - start);
out.printf("%d (query took %d ms)\n", wordWithCount.count, duration);
} catch (Exception e) {
e.printStackTrace();
}
} catch (Exception e) {
out.println("Query failed because of the following Exception:");
e.printStackTrace(out);
}
}
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
}
package com.nirav.modi;
导入org.apache.flink.api.common.ExecutionConfig;
导入org.apache.flink.api.common.JobID;
导入org.apache.flink.api.common.state.ValueState;
导入org.apache.flink.api.common.state.Valu