Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/asp.net-mvc-3/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Sockets Flink可以作为数据源接收http请求吗?_Sockets_Http_Datasource_Apache Flink - Fatal编程技术网

Sockets Flink可以作为数据源接收http请求吗?

Sockets Flink可以作为数据源接收http请求吗?,sockets,http,datasource,apache-flink,Sockets,Http,Datasource,Apache Flink,Flink可以读取套接字流,它可以读取http请求吗?怎么做 // socket example DataStream<XXX> socketStream = env .socketTextStream("localhost", 9999) .map(...); //套接字示例 DataStream socketStream=env .socketTextStream(“localhost”,9999) .地图(…); 为Flink创建HTTP接收器

Flink可以读取套接字流,它可以读取http请求吗?怎么做

// socket example
DataStream<XXX> socketStream = env
        .socketTextStream("localhost", 9999)
        .map(...);
//套接字示例
DataStream socketStream=env
.socketTextStream(“localhost”,9999)
.地图(…);
为Flink创建HTTP接收器连接器有一个开放的平台,但我没有看到关于创建源连接器的讨论

此外,目前还不清楚这是一个好主意。Flink的容错方法需要可以重绕和重放的源,因此它最适合于行为类似于消息队列的输入源。我建议在分布式日志中缓冲传入的http请求


举个例子,看看DriveTribe是如何使用Flink打开和打开他们的网站的。

我编写了一个自定义http源代码。请参考
onehourhttpextstreamfunction
。如果您想运行我的代码,您需要创建一个胖jar来包含ApacheHttpServer类

package org.apache.flink.streaming.examples.http;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.examples.socket.SocketWindowWordCount.WordWithCount;
import org.apache.flink.util.Collector;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.bootstrap.HttpServer;
import org.apache.http.impl.bootstrap.ServerBootstrap;
import org.apache.http.protocol.HttpContext;
import org.apache.http.protocol.HttpRequestHandler;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;

public class HttpRequestCount {

    public static void main(String[] args) throws Exception {

        // the host and the port to connect to
        final String path;
        final int port;
        try {
            final ParameterTool params = ParameterTool.fromArgs(args);
            path = params.has("path") ? params.get("path") : "*";
            port = params.getInt("port");
        } catch (Exception e) {
            System.err.println("No port specified. Please run 'SocketWindowWordCount "
                    + "--path <hostname> --port <port>', where path (* by default) "
                    + "and port is the address of the text server");
            System.err.println("To start a simple text server, run 'netcat -l <port>' and "
                    + "type the input text into the command line");
            return;
        }

        // get the execution environment
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // get input data by connecting to the socket
        DataStream<String> text = env.addSource(new OneHourHttpTextStreamFunction(path, port));

        // parse the data, group it, window it, and aggregate the counts
        DataStream<WordWithCount> windowCounts = text

                .flatMap(new FlatMapFunction<String, WordWithCount>() {
                    @Override
                    public void flatMap(String value, Collector<WordWithCount> out) {
                        try {
                            Thread.sleep(1000);
                        } catch (InterruptedException e) {
                            // TODO Auto-generated catch block
                            e.printStackTrace();
                        }
                        for (String word : value.split("\\s")) {
                            out.collect(new WordWithCount(word, 1L));
                        }
                    }
                })

                .keyBy("word").timeWindow(Time.seconds(5))

                .reduce(new ReduceFunction<WordWithCount>() {
                    @Override
                    public WordWithCount reduce(WordWithCount a, WordWithCount b) {
                        try {
                            Thread.sleep(1000);
                        } catch (InterruptedException e) {
                            // TODO Auto-generated catch block
                            e.printStackTrace();
                        }
                        return new WordWithCount(a.word, a.count + b.count);
                    }
                });

        // print the results with a single thread, rather than in parallel
        windowCounts.print().setParallelism(1);

        env.execute("Http Request Count");
    }

}

class OneHourHttpTextStreamFunction implements SourceFunction<String> {

    private static final long serialVersionUID = 1L;

    private final String path;

    private final int port;

    private transient HttpServer server;

    public OneHourHttpTextStreamFunction(String path, int port) {
        checkArgument(port > 0 && port < 65536, "port is out of range");

        this.path = checkNotNull(path, "path must not be null");
        this.port = port;
    }

    @Override
    public void run(SourceContext<String> ctx) throws Exception {
        server = ServerBootstrap.bootstrap().setListenerPort(port).registerHandler(path, new HttpRequestHandler(){

            @Override
            public void handle(HttpRequest req, HttpResponse rep, HttpContext context) throws HttpException, IOException {
                ctx.collect(req.getRequestLine().getUri());
                rep.setStatusCode(200);
                rep.setEntity(new StringEntity("OK"));
            }
        }).create();
        server.start();
        server.awaitTermination(1, TimeUnit.HOURS);
    }

    @Override
    public void cancel() {
        server.stop();
    }
}
package org.apache.flink.streaming.examples.http;
导入org.apache.flink.api.common.functions.FlatMapFunction;
导入org.apache.flink.api.common.functions.ReduceFunction;
导入org.apache.flink.api.java.utils.ParameterTool;
导入org.apache.flink.streaming.api.datastream.datastream;
导入org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
导入org.apache.flink.streaming.api.functions.source.SourceFunction;
导入org.apache.flink.streaming.api.windowing.time.time;
导入org.apache.flink.streaming.examples.socket.socketwindwordcount.WordWithCount;
导入org.apache.flink.util.Collector;
导入org.apache.http.HttpException;
导入org.apache.http.HttpRequest;
导入org.apache.http.HttpResponse;
导入org.apache.http.entity.StringEntity;
导入org.apache.http.impl.bootstrap.HttpServer;
导入org.apache.http.impl.bootstrap.ServerBootstrap;
导入org.apache.http.protocol.HttpContext;
导入org.apache.http.protocol.HttpRequestHandler;
导入java.io.IOException;
导入java.util.concurrent.TimeUnit;
导入静态org.apache.flink.util.premissions.checkArgument;
导入静态org.apache.flink.util.premissions.checkNotNull;
公共类HttpRequestCount{
公共静态void main(字符串[]args)引发异常{
//要连接到的主机和端口
最终字符串路径;
最终国际端口;
试一试{
final ParameterTool params=ParameterTool.fromArgs(args);
path=params.has(“path”)?params.get(“path”):“*”;
端口=params.getInt(“端口”);
}捕获(例外e){
System.err.println(“未指定端口。请运行'SocketWindowWordCount'
+--path--port',其中path(*默认)
+“和端口是文本服务器的地址”);
System.err.println(“要启动简单的文本服务器,请运行'netcat-l'并”
+“在命令行中键入输入文本”);
返回;
}
//获取执行环境
最终StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
//通过连接到套接字获取输入数据
DataStream text=env.addSource(新的OneHourHttpTextStreamFunction(路径、端口));
//解析数据、对其进行分组、打开窗口并聚合计数
数据流窗口计数=文本
.flatMap(新的flatMap函数(){
@凌驾
公共void平面图(字符串值,收集器输出){
试一试{
睡眠(1000);
}捕捉(中断异常e){
//TODO自动生成的捕捉块
e、 printStackTrace();
}
for(字符串字:value.split(\\s))){
out.collect(新单词with count(单词,1L));
}
}
})
.keyBy(“word”).timeWindow(时间秒(5))
.reduce(新的ReduceFunction(){
@凌驾
公共单词WithCount reduce(单词WithCount a、单词WithCount b){
试一试{
睡眠(1000);
}捕捉(中断异常e){
//TODO自动生成的捕捉块
e、 printStackTrace();
}
使用count返回新词(a.word,a.count+b.count);
}
});
//使用单个线程打印结果,而不是并行打印
windowCounts.print().setParallelism(1);
环境执行(“Http请求计数”);
}
}
类OneHourHttpTextStreamFunction实现SourceFunction{
私有静态最终长serialVersionUID=1L;
私有最终字符串路径;
私人最终国际港口;
私有瞬态HttpServer服务器;
公共OneHourHttpTextStreamFunction(字符串路径,int端口){
checkArgument(端口>0&&port<65536,“端口超出范围”);
this.path=checkNotNull(路径,“路径不得为null”);
this.port=端口;
}
@凌驾
公共无效运行(SourceContext ctx)引发异常{
server=ServerBootstrap.bootstrap().setListenerPort(端口).registerHandler(路径,新的HttpRequestHandler()){
@凌驾
公共无效句柄(HttpRequest请求、HttpResponse代表、HttpContext上下文)抛出HttpException、IOException{
collect(req.getRequestLine().getUri());
代表设置状态代码(200);
代表setEntity(新的StringEntity(“OK”));
}
}).create();
server.start();
服务器。等待终止(1,时间单位。小时);
}
@凌驾
公开作废取消(){
server.stop();
}
}

如果您想要演示jar,请留下评论。

在JIRA票据中,有人为HTTP